gcc/gcc/config/aarch64/aarch64-simd.md

9129 lines
298 KiB
Markdown
Raw Normal View History

AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; Machine description for AArch64 AdvSIMD architecture.
2022-01-03 10:42:10 +01:00
;; Copyright (C) 2011-2022 Free Software Foundation, Inc.
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; Contributed by ARM Ltd.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
(define_expand "mov<mode>"
[(set (match_operand:VALL_F16 0 "nonimmediate_operand")
(match_operand:VALL_F16 1 "general_operand"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"
/* Force the operand into a register if it is not an
immediate whose use can be replaced with xzr.
If the mode is 16 bytes wide, then we will be doing
a stp in DI mode, so we check the validity of that.
If the mode is 8 bytes wide, then we will do doing a
normal str, so the check need not apply. */
if (GET_CODE (operands[0]) == MEM
&& !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
[AArch64] Set NUM_POLY_INT_COEFFS to 2 This patch switches the AArch64 port to use 2 poly_int coefficients and updates code as necessary to keep it compiling. One potentially-significant change is to aarch64_hard_regno_caller_save_mode. The old implementation was written in a pretty conservative way: it changed the default behaviour for single-register values, but used the default handling for multi-register values. I don't think that's necessary, since the interesting cases for this macro are usually the single-register ones. Multi-register modes take up the whole of the constituent registers and the move patterns for all multi-register modes should be equally good. Using the original mode for multi-register cases stops us from using SVE modes to spill multi-register NEON values. This was caught by gcc.c-torture/execute/pr47538.c. Also, aarch64_shift_truncation_mask used GET_MODE_BITSIZE - 1. GET_MODE_UNIT_BITSIZE - 1 is equivalent for the cases that it handles (which are all scalars), and I think it's more obvious, since if we ever do use this for elementwise shifts of vector modes, the mask will depend on the number of bits in each element rather than the number of bits in the whole vector. 2018-01-11 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def (NUM_POLY_INT_COEFFS): Set to 2. * config/aarch64/aarch64-protos.h (aarch64_initial_elimination_offset): Return a poly_int64 rather than a HOST_WIDE_INT. (aarch64_offset_7bit_signed_scaled_p): Take the offset as a poly_int64 rather than a HOST_WIDE_INT. * config/aarch64/aarch64.h (aarch64_frame): Protect with HAVE_POLY_INT_H rather than HOST_WIDE_INT. Change locals_offset, hard_fp_offset, frame_size, initial_adjust, callee_offset and final_offset from HOST_WIDE_INT to poly_int64. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use to_constant when getting the number of units in an Advanced SIMD mode. (aarch64_builtin_vectorized_function): Check for a constant number of units. * config/aarch64/aarch64-simd.md (mov<mode>): Handle polynomial GET_MODE_SIZE. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use the nunits attribute instead of GET_MODE_NUNITS. * config/aarch64/aarch64.c (aarch64_hard_regno_nregs) (aarch64_class_max_nregs): Use the constant_lowest_bound of the GET_MODE_SIZE for fixed-size registers. (aarch64_const_vec_all_same_in_range_p): Use const_vec_duplicate_p. (aarch64_hard_regno_call_part_clobbered, aarch64_classify_index) (aarch64_mode_valid_for_sched_fusion_p, aarch64_classify_address) (aarch64_legitimize_address_displacement, aarch64_secondary_reload) (aarch64_print_operand, aarch64_print_address_internal) (aarch64_address_cost, aarch64_rtx_costs, aarch64_register_move_cost) (aarch64_short_vector_p, aapcs_vfp_sub_candidate) (aarch64_simd_attr_length_rglist, aarch64_operands_ok_for_ldpstp): Handle polynomial GET_MODE_SIZE. (aarch64_hard_regno_caller_save_mode): Likewise. Return modes wider than SImode without modification. (tls_symbolic_operand_type): Use strip_offset instead of split_const. (aarch64_pass_by_reference, aarch64_layout_arg, aarch64_pad_reg_upward) (aarch64_gimplify_va_arg_expr): Assert that we don't yet handle passing and returning SVE modes. (aarch64_function_value, aarch64_layout_arg): Use gen_int_mode rather than GEN_INT. (aarch64_emit_probe_stack_range): Take the size as a poly_int64 rather than a HOST_WIDE_INT, but call sorry if it isn't constant. (aarch64_allocate_and_probe_stack_space): Likewise. (aarch64_layout_frame): Cope with polynomial offsets. (aarch64_save_callee_saves, aarch64_restore_callee_saves): Take the start_offset as a poly_int64 rather than a HOST_WIDE_INT. Track polynomial offsets. (offset_9bit_signed_unscaled_p, offset_12bit_unsigned_scaled_p) (aarch64_offset_7bit_signed_scaled_p): Take the offset as a poly_int64 rather than a HOST_WIDE_INT. (aarch64_get_separate_components, aarch64_process_components) (aarch64_expand_prologue, aarch64_expand_epilogue) (aarch64_use_return_insn_p): Handle polynomial frame offsets. (aarch64_anchor_offset): New function, split out from... (aarch64_legitimize_address): ...here. (aarch64_builtin_vectorization_cost): Handle polynomial TYPE_VECTOR_SUBPARTS. (aarch64_simd_check_vect_par_cnst_half): Handle polynomial GET_MODE_NUNITS. (aarch64_simd_make_constant, aarch64_expand_vector_init): Get the number of elements from the PARALLEL rather than the mode. (aarch64_shift_truncation_mask): Use GET_MODE_UNIT_BITSIZE rather than GET_MODE_BITSIZE. (aarch64_evpc_trn, aarch64_evpc_uzp, aarch64_evpc_ext) (aarch64_evpc_rev, aarch64_evpc_dup, aarch64_evpc_zip) (aarch64_expand_vec_perm_const_1): Handle polynomial d->perm.length () and d->perm elements. (aarch64_evpc_tbl): Likewise. Use nelt rather than GET_MODE_NUNITS. Apply to_constant to d->perm elements. (aarch64_simd_valid_immediate, aarch64_vec_fpconst_pow_of_2): Handle polynomial CONST_VECTOR_NUNITS. (aarch64_move_pointer): Take amount as a poly_int64 rather than an int. (aarch64_progress_pointer): Avoid temporary variable. * config/aarch64/aarch64.md (aarch64_<crc_variant>): Use the mode attribute instead of GET_MODE. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256533
2018-01-11 14:17:02 +01:00
&& ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
&& aarch64_mem_pair_operand (operands[0], DImode))
[AArch64] Set NUM_POLY_INT_COEFFS to 2 This patch switches the AArch64 port to use 2 poly_int coefficients and updates code as necessary to keep it compiling. One potentially-significant change is to aarch64_hard_regno_caller_save_mode. The old implementation was written in a pretty conservative way: it changed the default behaviour for single-register values, but used the default handling for multi-register values. I don't think that's necessary, since the interesting cases for this macro are usually the single-register ones. Multi-register modes take up the whole of the constituent registers and the move patterns for all multi-register modes should be equally good. Using the original mode for multi-register cases stops us from using SVE modes to spill multi-register NEON values. This was caught by gcc.c-torture/execute/pr47538.c. Also, aarch64_shift_truncation_mask used GET_MODE_BITSIZE - 1. GET_MODE_UNIT_BITSIZE - 1 is equivalent for the cases that it handles (which are all scalars), and I think it's more obvious, since if we ever do use this for elementwise shifts of vector modes, the mask will depend on the number of bits in each element rather than the number of bits in the whole vector. 2018-01-11 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def (NUM_POLY_INT_COEFFS): Set to 2. * config/aarch64/aarch64-protos.h (aarch64_initial_elimination_offset): Return a poly_int64 rather than a HOST_WIDE_INT. (aarch64_offset_7bit_signed_scaled_p): Take the offset as a poly_int64 rather than a HOST_WIDE_INT. * config/aarch64/aarch64.h (aarch64_frame): Protect with HAVE_POLY_INT_H rather than HOST_WIDE_INT. Change locals_offset, hard_fp_offset, frame_size, initial_adjust, callee_offset and final_offset from HOST_WIDE_INT to poly_int64. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use to_constant when getting the number of units in an Advanced SIMD mode. (aarch64_builtin_vectorized_function): Check for a constant number of units. * config/aarch64/aarch64-simd.md (mov<mode>): Handle polynomial GET_MODE_SIZE. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use the nunits attribute instead of GET_MODE_NUNITS. * config/aarch64/aarch64.c (aarch64_hard_regno_nregs) (aarch64_class_max_nregs): Use the constant_lowest_bound of the GET_MODE_SIZE for fixed-size registers. (aarch64_const_vec_all_same_in_range_p): Use const_vec_duplicate_p. (aarch64_hard_regno_call_part_clobbered, aarch64_classify_index) (aarch64_mode_valid_for_sched_fusion_p, aarch64_classify_address) (aarch64_legitimize_address_displacement, aarch64_secondary_reload) (aarch64_print_operand, aarch64_print_address_internal) (aarch64_address_cost, aarch64_rtx_costs, aarch64_register_move_cost) (aarch64_short_vector_p, aapcs_vfp_sub_candidate) (aarch64_simd_attr_length_rglist, aarch64_operands_ok_for_ldpstp): Handle polynomial GET_MODE_SIZE. (aarch64_hard_regno_caller_save_mode): Likewise. Return modes wider than SImode without modification. (tls_symbolic_operand_type): Use strip_offset instead of split_const. (aarch64_pass_by_reference, aarch64_layout_arg, aarch64_pad_reg_upward) (aarch64_gimplify_va_arg_expr): Assert that we don't yet handle passing and returning SVE modes. (aarch64_function_value, aarch64_layout_arg): Use gen_int_mode rather than GEN_INT. (aarch64_emit_probe_stack_range): Take the size as a poly_int64 rather than a HOST_WIDE_INT, but call sorry if it isn't constant. (aarch64_allocate_and_probe_stack_space): Likewise. (aarch64_layout_frame): Cope with polynomial offsets. (aarch64_save_callee_saves, aarch64_restore_callee_saves): Take the start_offset as a poly_int64 rather than a HOST_WIDE_INT. Track polynomial offsets. (offset_9bit_signed_unscaled_p, offset_12bit_unsigned_scaled_p) (aarch64_offset_7bit_signed_scaled_p): Take the offset as a poly_int64 rather than a HOST_WIDE_INT. (aarch64_get_separate_components, aarch64_process_components) (aarch64_expand_prologue, aarch64_expand_epilogue) (aarch64_use_return_insn_p): Handle polynomial frame offsets. (aarch64_anchor_offset): New function, split out from... (aarch64_legitimize_address): ...here. (aarch64_builtin_vectorization_cost): Handle polynomial TYPE_VECTOR_SUBPARTS. (aarch64_simd_check_vect_par_cnst_half): Handle polynomial GET_MODE_NUNITS. (aarch64_simd_make_constant, aarch64_expand_vector_init): Get the number of elements from the PARALLEL rather than the mode. (aarch64_shift_truncation_mask): Use GET_MODE_UNIT_BITSIZE rather than GET_MODE_BITSIZE. (aarch64_evpc_trn, aarch64_evpc_uzp, aarch64_evpc_ext) (aarch64_evpc_rev, aarch64_evpc_dup, aarch64_evpc_zip) (aarch64_expand_vec_perm_const_1): Handle polynomial d->perm.length () and d->perm elements. (aarch64_evpc_tbl): Likewise. Use nelt rather than GET_MODE_NUNITS. Apply to_constant to d->perm elements. (aarch64_simd_valid_immediate, aarch64_vec_fpconst_pow_of_2): Handle polynomial CONST_VECTOR_NUNITS. (aarch64_move_pointer): Take amount as a poly_int64 rather than an int. (aarch64_progress_pointer): Avoid temporary variable. * config/aarch64/aarch64.md (aarch64_<crc_variant>): Use the mode attribute instead of GET_MODE. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256533
2018-01-11 14:17:02 +01:00
|| known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
operands[1] = force_reg (<MODE>mode, operands[1]);
Handle CONST_POLY_INTs in CONST_VECTORs [PR97141, PR98726] This PR is caused by POLY_INT_CSTs being (necessarily) valid in tree-level VECTOR_CSTs but CONST_POLY_INTs not being valid in RTL CONST_VECTORs. I can't tell/remember how deliberate that was, but I'm guessing not very. In particular, valid_for_const_vector_p was added to guard against symbolic constants rather than CONST_POLY_INTs. I did briefly consider whether we should maintain the current status anyway. However, that would then require a way of constructing variable-length vectors from individiual elements if, say, we have: { [2, 2], [3, 2], [4, 2], … } So I'm chalking this up to an oversight. I think the intention (and certainly the natural thing) is to have the same rules for both trees and RTL. The SVE CONST_VECTOR code should already be set up to handle CONST_POLY_INTs. However, we need to add support for Advanced SIMD CONST_VECTORs that happen to contain SVE-based values. The patch does that by expanding such CONST_VECTORs in the same way as variable vectors. gcc/ PR rtl-optimization/97141 PR rtl-optimization/98726 * emit-rtl.c (valid_for_const_vector_p): Return true for CONST_POLY_INT_P. * rtx-vector-builder.h (rtx_vector_builder::step): Return a poly_wide_int instead of a wide_int. (rtx_vector_builder::apply_set): Take a poly_wide_int instead of a wide_int. * rtx-vector-builder.c (rtx_vector_builder::apply_set): Likewise. * config/aarch64/aarch64.c (aarch64_legitimate_constant_p): Return false for CONST_VECTORs that cannot be forced to memory. * config/aarch64/aarch64-simd.md (mov<mode>): If a CONST_VECTOR is too complex to force to memory, build it up from individual elements instead. gcc/testsuite/ PR rtl-optimization/97141 PR rtl-optimization/98726 * gcc.c-torture/compile/pr97141.c: New test. * gcc.c-torture/compile/pr98726.c: Likewise. * gcc.target/aarch64/sve/pr97141.c: Likewise. * gcc.target/aarch64/sve/pr98726.c: Likewise.
2021-03-31 20:34:00 +02:00
/* If a constant is too complex to force to memory (e.g. because it
contains CONST_POLY_INTs), build it up from individual elements instead.
We should only need to do this before RA; aarch64_legitimate_constant_p
should ensure that we don't try to rematerialize the constant later. */
if (GET_CODE (operands[1]) == CONST_VECTOR
&& targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
{
aarch64_expand_vector_init (operands[0], operands[1]);
DONE;
}
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"
)
(define_expand "movmisalign<mode>"
[(set (match_operand:VALL_F16 0 "nonimmediate_operand")
(match_operand:VALL_F16 1 "general_operand"))]
"TARGET_SIMD && !STRICT_ALIGNMENT"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
{
/* This pattern is not permitted to fail during expansion: if both arguments
are non-registers (e.g. memory := constant, which can be created by the
auto-vectorizer), force operand 1 into a register. */
if (!register_operand (operands[0], <MODE>mode)
&& !register_operand (operands[1], <MODE>mode))
operands[1] = force_reg (<MODE>mode, operands[1]);
})
(define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
(vec_duplicate:VDQ_I
(match_operand:<VEL> 1 "register_operand" "w,?r")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"@
dup\\t%0.<Vtype>, %1.<Vetype>[0]
dup\\t%0.<Vtype>, %<vwcore>1"
[(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
)
(define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
(vec_duplicate:VDQF_F16
(match_operand:<VEL> 1 "register_operand" "w,r")))]
"TARGET_SIMD"
"@
dup\\t%0.<Vtype>, %1.<Vetype>[0]
dup\\t%0.<Vtype>, %<vwcore>1"
[(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_dup_lane<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_duplicate:VALL_F16
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(vec_select:<VEL>
(match_operand:VALL_F16 1 "register_operand" "w")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_dup<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
[(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
(vec_duplicate:VALL_F16_NO_V2Q
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_dup<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
config.gcc: Add arm_bf16.h. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * config.gcc: Add arm_bf16.h. * config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_std_type): Add BFmode. (aarch64_init_simd_builtin_types): Define element types for vector types. (aarch64_init_bf16_types): New function. (aarch64_general_init_builtins): Add arm_init_bf16_types function call. * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector modes. * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types. * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move patterns. * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF. (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Add support for BF types. (aarch64_gimplify_va_arg_expr): Add support for BF types. (aarch64_vq_mode): Add support for BF types. (aarch64_simd_container_mode): Add support for BF types. (aarch64_mangle_type): Add support for BF scalar type. * config/aarch64/aarch64.md: Add BFmode to movhf pattern. * config/aarch64/arm_bf16.h: New file. * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types. * config/aarch64/iterators.md: Add BF types to mode attributes. (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test. * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test. * gcc.target/aarch64/bfloat16_scalar_1.c: New test. * gcc.target/aarch64/bfloat16_scalar_2.c: New test. * gcc.target/aarch64/bfloat16_scalar_3.c: New test. * gcc.target/aarch64/bfloat16_scalar_4.c: New test. * gcc.target/aarch64/bfloat16_simd_1.c: New test. * gcc.target/aarch64/bfloat16_simd_2.c: New test. * gcc.target/aarch64/bfloat16_simd_3.c: New test. From-SVN: r280129
2020-01-10 20:23:41 +01:00
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
[(set (match_operand:VDMOV 0 "nonimmediate_operand"
"=w, m, m, w, ?r, ?w, ?r, w")
config.gcc: Add arm_bf16.h. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * config.gcc: Add arm_bf16.h. * config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_std_type): Add BFmode. (aarch64_init_simd_builtin_types): Define element types for vector types. (aarch64_init_bf16_types): New function. (aarch64_general_init_builtins): Add arm_init_bf16_types function call. * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector modes. * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types. * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move patterns. * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF. (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Add support for BF types. (aarch64_gimplify_va_arg_expr): Add support for BF types. (aarch64_vq_mode): Add support for BF types. (aarch64_simd_container_mode): Add support for BF types. (aarch64_mangle_type): Add support for BF scalar type. * config/aarch64/aarch64.md: Add BFmode to movhf pattern. * config/aarch64/arm_bf16.h: New file. * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types. * config/aarch64/iterators.md: Add BF types to mode attributes. (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test. * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test. * gcc.target/aarch64/bfloat16_scalar_1.c: New test. * gcc.target/aarch64/bfloat16_scalar_2.c: New test. * gcc.target/aarch64/bfloat16_scalar_3.c: New test. * gcc.target/aarch64/bfloat16_scalar_4.c: New test. * gcc.target/aarch64/bfloat16_simd_1.c: New test. * gcc.target/aarch64/bfloat16_simd_2.c: New test. * gcc.target/aarch64/bfloat16_simd_3.c: New test. From-SVN: r280129
2020-01-10 20:23:41 +01:00
(match_operand:VDMOV 1 "general_operand"
"m, Dz, w, w, w, r, r, Dn"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
{
switch (which_alternative)
{
case 0: return "ldr\t%d0, %1";
case 1: return "str\txzr, %0";
case 2: return "str\t%d1, %0";
case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
case 4: return "umov\t%0, %1.d[0]";
case 5: return "fmov\t%d0, %1";
case 6: return "mov\t%0, %1";
case 7:
[AArch64] Rewrite aarch64_simd_valid_immediate This patch reworks aarch64_simd_valid_immediate so that it's easier to add SVE support. The main changes are: - make simd_immediate_info easier to construct - replace the while (1) { ... break; } blocks with checks that use the full 64-bit value of the constant - treat floating-point modes as integers if they aren't valid as floating-point values 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_output_simd_mov_immediate): Remove the mode argument. (aarch64_simd_valid_immediate): Remove the mode and inverse arguments. * config/aarch64/iterators.md (bitsize): New iterator. * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>, and<mode>3) (ior<mode>3): Update calls to aarch64_output_simd_mov_immediate. * config/aarch64/constraints.md (Do, Db, Dn): Update calls to aarch64_simd_valid_immediate. * config/aarch64/predicates.md (aarch64_reg_or_orr_imm): Likewise. (aarch64_reg_or_bic_imm): Likewise. * config/aarch64/aarch64.c (simd_immediate_info): Replace mvn with an insn_type enum and msl with a modifier_type enum. Replace element_width with a scalar_mode. Change the shift to unsigned int. Add constructors for scalar_float_mode and scalar_int_mode elements. (aarch64_vect_float_const_representable_p): Delete. (aarch64_can_const_movi_rtx_p) (aarch64_simd_scalar_immediate_valid_for_move) (aarch64_simd_make_constant): Update call to aarch64_simd_valid_immediate. (aarch64_advsimd_valid_immediate_hs): New function. (aarch64_advsimd_valid_immediate): Likewise. (aarch64_simd_valid_immediate): Remove mode and inverse arguments. Rewrite to use the above. Use const_vec_duplicate_p to detect duplicated constants and use aarch64_float_const_zero_rtx_p and aarch64_float_const_representable_p on the result. (aarch64_output_simd_mov_immediate): Remove mode argument. Update call to aarch64_simd_valid_immediate and use of simd_immediate_info. (aarch64_output_scalar_simd_mov_immediate): Update call accordingly. gcc/testsuite/ * gcc.target/aarch64/vect-movi.c (movi_float_lsl24): New function. (main): Call it. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256205
2018-01-03 22:43:44 +01:00
return aarch64_output_simd_mov_immediate (operands[1], 64);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
default: gcc_unreachable ();
}
}
[(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
neon_logic<q>, neon_to_gp<q>, f_mcr,\
mov_reg, neon_move<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
config.gcc: Add arm_bf16.h. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * config.gcc: Add arm_bf16.h. * config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_std_type): Add BFmode. (aarch64_init_simd_builtin_types): Define element types for vector types. (aarch64_init_bf16_types): New function. (aarch64_general_init_builtins): Add arm_init_bf16_types function call. * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector modes. * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types. * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move patterns. * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF. (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Add support for BF types. (aarch64_gimplify_va_arg_expr): Add support for BF types. (aarch64_vq_mode): Add support for BF types. (aarch64_simd_container_mode): Add support for BF types. (aarch64_mangle_type): Add support for BF scalar type. * config/aarch64/aarch64.md: Add BFmode to movhf pattern. * config/aarch64/arm_bf16.h: New file. * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types. * config/aarch64/iterators.md: Add BF types to mode attributes. (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test. * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test. * gcc.target/aarch64/bfloat16_scalar_1.c: New test. * gcc.target/aarch64/bfloat16_scalar_2.c: New test. * gcc.target/aarch64/bfloat16_scalar_3.c: New test. * gcc.target/aarch64/bfloat16_scalar_4.c: New test. * gcc.target/aarch64/bfloat16_simd_1.c: New test. * gcc.target/aarch64/bfloat16_simd_2.c: New test. * gcc.target/aarch64/bfloat16_simd_3.c: New test. From-SVN: r280129
2020-01-10 20:23:41 +01:00
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
[(set (match_operand:VQMOV 0 "nonimmediate_operand"
"=w, Umn, m, w, ?r, ?w, ?r, w")
config.gcc: Add arm_bf16.h. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * config.gcc: Add arm_bf16.h. * config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_std_type): Add BFmode. (aarch64_init_simd_builtin_types): Define element types for vector types. (aarch64_init_bf16_types): New function. (aarch64_general_init_builtins): Add arm_init_bf16_types function call. * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector modes. * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types. * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move patterns. * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF. (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Add support for BF types. (aarch64_gimplify_va_arg_expr): Add support for BF types. (aarch64_vq_mode): Add support for BF types. (aarch64_simd_container_mode): Add support for BF types. (aarch64_mangle_type): Add support for BF scalar type. * config/aarch64/aarch64.md: Add BFmode to movhf pattern. * config/aarch64/arm_bf16.h: New file. * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types. * config/aarch64/iterators.md: Add BF types to mode attributes. (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test. * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test. * gcc.target/aarch64/bfloat16_scalar_1.c: New test. * gcc.target/aarch64/bfloat16_scalar_2.c: New test. * gcc.target/aarch64/bfloat16_scalar_3.c: New test. * gcc.target/aarch64/bfloat16_scalar_4.c: New test. * gcc.target/aarch64/bfloat16_simd_1.c: New test. * gcc.target/aarch64/bfloat16_simd_2.c: New test. * gcc.target/aarch64/bfloat16_simd_3.c: New test. From-SVN: r280129
2020-01-10 20:23:41 +01:00
(match_operand:VQMOV 1 "general_operand"
"m, Dz, w, w, w, r, r, Dn"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
{
[AARCH64] Add support for vector and scalar floating-point immediate loads. gcc/ * config/aarch64/aarch64-protos.h (aarch64_const_double_zero_rtx_p): Rename to... (aarch64_float_const_zero_rtx_p): ...this. (aarch64_float_const_representable_p): New. (aarch64_output_simd_mov_immediate): Likewise. * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): Refactor move immediate case. * config/aarch64/aarch64.c (aarch64_const_double_zero_rtx_p): Rename to... (aarch64_float_const_zero_rtx_p): ...this. (aarch64_print_operand): Allow printing of new constants. (aarch64_valid_floating_const): New. (aarch64_legitimate_constant_p): Check for valid floating-point constants. (aarch64_simd_valid_immediate): Likewise. (aarch64_vect_float_const_representable_p): New. (aarch64_float_const_representable_p): Likewise. (aarch64_simd_imm_zero_p): Also allow for floating-point 0.0. (aarch64_output_simd_mov_immediate): New. * config/aarch64/aarch64.md (*movsf_aarch64): Add new alternative. (*movdf_aarch64): Likewise. * config/aarch64/constraints.md (Ufc): New. (Y): call aarch64_float_const_zero_rtx. * config/aarch64/predicates.md (aarch64_fp_compare_operand): New. gcc/testsuite/ * gcc.target/aarch64/fmovd.c: New. * gcc.target/aarch64/fmovf.c: Likewise. * gcc.target/aarch64/fmovd-zero.c: Likewise. * gcc.target/aarch64/fmovf-zero.c: Likewise. * gcc.target/aarch64/vect-fmovd.c: Likewise. * gcc.target/aarch64/vect-fmovf.c: Likewise. * gcc.target/aarch64/vect-fmovd-zero.c: Likewise. * gcc.target/aarch64/vect-fmovf-zero.c: Likewise. From-SVN: r194972
2013-01-07 16:22:06 +01:00
switch (which_alternative)
{
case 0:
return "ldr\t%q0, %1";
case 1:
return "stp\txzr, xzr, %0";
case 2:
return "str\t%q1, %0";
case 3:
return "mov\t%0.<Vbtype>, %1.<Vbtype>";
case 4:
case 5:
[AARCH64] Add support for vector and scalar floating-point immediate loads. gcc/ * config/aarch64/aarch64-protos.h (aarch64_const_double_zero_rtx_p): Rename to... (aarch64_float_const_zero_rtx_p): ...this. (aarch64_float_const_representable_p): New. (aarch64_output_simd_mov_immediate): Likewise. * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): Refactor move immediate case. * config/aarch64/aarch64.c (aarch64_const_double_zero_rtx_p): Rename to... (aarch64_float_const_zero_rtx_p): ...this. (aarch64_print_operand): Allow printing of new constants. (aarch64_valid_floating_const): New. (aarch64_legitimate_constant_p): Check for valid floating-point constants. (aarch64_simd_valid_immediate): Likewise. (aarch64_vect_float_const_representable_p): New. (aarch64_float_const_representable_p): Likewise. (aarch64_simd_imm_zero_p): Also allow for floating-point 0.0. (aarch64_output_simd_mov_immediate): New. * config/aarch64/aarch64.md (*movsf_aarch64): Add new alternative. (*movdf_aarch64): Likewise. * config/aarch64/constraints.md (Ufc): New. (Y): call aarch64_float_const_zero_rtx. * config/aarch64/predicates.md (aarch64_fp_compare_operand): New. gcc/testsuite/ * gcc.target/aarch64/fmovd.c: New. * gcc.target/aarch64/fmovf.c: Likewise. * gcc.target/aarch64/fmovd-zero.c: Likewise. * gcc.target/aarch64/fmovf-zero.c: Likewise. * gcc.target/aarch64/vect-fmovd.c: Likewise. * gcc.target/aarch64/vect-fmovf.c: Likewise. * gcc.target/aarch64/vect-fmovd-zero.c: Likewise. * gcc.target/aarch64/vect-fmovf-zero.c: Likewise. From-SVN: r194972
2013-01-07 16:22:06 +01:00
case 6:
return "#";
case 7:
[AArch64] Rewrite aarch64_simd_valid_immediate This patch reworks aarch64_simd_valid_immediate so that it's easier to add SVE support. The main changes are: - make simd_immediate_info easier to construct - replace the while (1) { ... break; } blocks with checks that use the full 64-bit value of the constant - treat floating-point modes as integers if they aren't valid as floating-point values 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_output_simd_mov_immediate): Remove the mode argument. (aarch64_simd_valid_immediate): Remove the mode and inverse arguments. * config/aarch64/iterators.md (bitsize): New iterator. * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>, and<mode>3) (ior<mode>3): Update calls to aarch64_output_simd_mov_immediate. * config/aarch64/constraints.md (Do, Db, Dn): Update calls to aarch64_simd_valid_immediate. * config/aarch64/predicates.md (aarch64_reg_or_orr_imm): Likewise. (aarch64_reg_or_bic_imm): Likewise. * config/aarch64/aarch64.c (simd_immediate_info): Replace mvn with an insn_type enum and msl with a modifier_type enum. Replace element_width with a scalar_mode. Change the shift to unsigned int. Add constructors for scalar_float_mode and scalar_int_mode elements. (aarch64_vect_float_const_representable_p): Delete. (aarch64_can_const_movi_rtx_p) (aarch64_simd_scalar_immediate_valid_for_move) (aarch64_simd_make_constant): Update call to aarch64_simd_valid_immediate. (aarch64_advsimd_valid_immediate_hs): New function. (aarch64_advsimd_valid_immediate): Likewise. (aarch64_simd_valid_immediate): Remove mode and inverse arguments. Rewrite to use the above. Use const_vec_duplicate_p to detect duplicated constants and use aarch64_float_const_zero_rtx_p and aarch64_float_const_representable_p on the result. (aarch64_output_simd_mov_immediate): Remove mode argument. Update call to aarch64_simd_valid_immediate and use of simd_immediate_info. (aarch64_output_scalar_simd_mov_immediate): Update call accordingly. gcc/testsuite/ * gcc.target/aarch64/vect-movi.c (movi_float_lsl24): New function. (main): Call it. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256205
2018-01-03 22:43:44 +01:00
return aarch64_output_simd_mov_immediate (operands[1], 128);
default:
gcc_unreachable ();
[AARCH64] Add support for vector and scalar floating-point immediate loads. gcc/ * config/aarch64/aarch64-protos.h (aarch64_const_double_zero_rtx_p): Rename to... (aarch64_float_const_zero_rtx_p): ...this. (aarch64_float_const_representable_p): New. (aarch64_output_simd_mov_immediate): Likewise. * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): Refactor move immediate case. * config/aarch64/aarch64.c (aarch64_const_double_zero_rtx_p): Rename to... (aarch64_float_const_zero_rtx_p): ...this. (aarch64_print_operand): Allow printing of new constants. (aarch64_valid_floating_const): New. (aarch64_legitimate_constant_p): Check for valid floating-point constants. (aarch64_simd_valid_immediate): Likewise. (aarch64_vect_float_const_representable_p): New. (aarch64_float_const_representable_p): Likewise. (aarch64_simd_imm_zero_p): Also allow for floating-point 0.0. (aarch64_output_simd_mov_immediate): New. * config/aarch64/aarch64.md (*movsf_aarch64): Add new alternative. (*movdf_aarch64): Likewise. * config/aarch64/constraints.md (Ufc): New. (Y): call aarch64_float_const_zero_rtx. * config/aarch64/predicates.md (aarch64_fp_compare_operand): New. gcc/testsuite/ * gcc.target/aarch64/fmovd.c: New. * gcc.target/aarch64/fmovf.c: Likewise. * gcc.target/aarch64/fmovd-zero.c: Likewise. * gcc.target/aarch64/fmovf-zero.c: Likewise. * gcc.target/aarch64/vect-fmovd.c: Likewise. * gcc.target/aarch64/vect-fmovf.c: Likewise. * gcc.target/aarch64/vect-fmovd-zero.c: Likewise. * gcc.target/aarch64/vect-fmovf-zero.c: Likewise. From-SVN: r194972
2013-01-07 16:22:06 +01:00
}
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
}
[(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
neon_logic<q>, multiple, multiple,\
multiple, neon_move<q>")
(set_attr "length" "4,4,4,4,8,8,8,4")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; When storing lane zero we can use the normal STR and its more permissive
;; addressing modes.
(define_insn "aarch64_store_lane0<mode>"
[(set (match_operand:<VEL> 0 "memory_operand" "=m")
(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
(parallel [(match_operand 2 "const_int_operand" "n")])))]
"TARGET_SIMD
&& ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
"str\\t%<Vetype>1, %0"
[(set_attr "type" "neon_store1_1reg<q>")]
)
[AArch64] Merge stores of D-register values with different modes This patch merges loads and stores from D-registers that are of different modes. Code like this: typedef int __attribute__((vector_size(8))) vec; struct pair { vec v; double d; } Now generates a store pair instruction: void assign (struct pair *p, vec v) { p->v = v; p->d = 1.0; } Whereas previously it generated two `str` instructions. This patch also merges storing of double zero values with long integer values: struct pair { long long l; double d; } void foo (struct pair *p) { p->l = 10; p->d = 0.0; } Now generates a single store pair instruction rather than two `str` instructions. The patch basically generalises the mode iterators on the patterns in aarch64.md and the peepholes in aarch64-ldpstp.md to take all combinations of pairs of modes so, while it may be a large-ish patch, it does fairly mechanical stuff. 2018-05-22 Jackson Woodruff <jackson.woodruff@arm.com> Kyrylo Tkachov <kyrylo.tkachov@arm.com> * config/aarch64/aarch64.md: New patterns to generate stp and ldp. (store_pair_sw, store_pair_dw): New patterns to generate stp for single words and double words. (load_pair_sw, load_pair_dw): Likewise. (store_pair_sf, store_pair_df, store_pair_si, store_pair_di): Delete. (load_pair_sf, load_pair_df, load_pair_si, load_pair_di): Delete. * config/aarch64/aarch64-ldpstp.md: Modify peephole for different mode ldpstp and add peephole for merged zero stores. Likewise for loads. * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Add size check. (aarch64_gen_store_pair): Rename calls to match new patterns. (aarch64_gen_load_pair): Rename calls to match new patterns. * config/aarch64/aarch64-simd.md (load_pair<mode>): Rename to... (load_pair<DREG:mode><DREG2:mode>): ... This. (store_pair<mode>): Rename to... (vec_store_pair<DREG:mode><DREG2:mode>): ... This. * config/aarch64/iterators.md (DREG, DREG2, DX2, SX, SX2, DSX): New mode iterators. (V_INT_EQUIV): Handle SImode. * config/aarch64/predicates.md (aarch64_reg_zero_or_fp_zero): New predicate. * gcc.target/aarch64/ldp_stp_6.c: New. * gcc.target/aarch64/ldp_stp_7.c: New. * gcc.target/aarch64/ldp_stp_8.c: New. Co-Authored-By: Kyrylo Tkachov <kyrylo.tkachov@arm.com> From-SVN: r260538
2018-05-22 17:35:06 +02:00
(define_insn "load_pair<DREG:mode><DREG2:mode>"
[(set (match_operand:DREG 0 "register_operand" "=w")
(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:DREG2 2 "register_operand" "=w")
(match_operand:DREG2 3 "memory_operand" "m"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
[AArch64] Merge stores of D-register values with different modes This patch merges loads and stores from D-registers that are of different modes. Code like this: typedef int __attribute__((vector_size(8))) vec; struct pair { vec v; double d; } Now generates a store pair instruction: void assign (struct pair *p, vec v) { p->v = v; p->d = 1.0; } Whereas previously it generated two `str` instructions. This patch also merges storing of double zero values with long integer values: struct pair { long long l; double d; } void foo (struct pair *p) { p->l = 10; p->d = 0.0; } Now generates a single store pair instruction rather than two `str` instructions. The patch basically generalises the mode iterators on the patterns in aarch64.md and the peepholes in aarch64-ldpstp.md to take all combinations of pairs of modes so, while it may be a large-ish patch, it does fairly mechanical stuff. 2018-05-22 Jackson Woodruff <jackson.woodruff@arm.com> Kyrylo Tkachov <kyrylo.tkachov@arm.com> * config/aarch64/aarch64.md: New patterns to generate stp and ldp. (store_pair_sw, store_pair_dw): New patterns to generate stp for single words and double words. (load_pair_sw, load_pair_dw): Likewise. (store_pair_sf, store_pair_df, store_pair_si, store_pair_di): Delete. (load_pair_sf, load_pair_df, load_pair_si, load_pair_di): Delete. * config/aarch64/aarch64-ldpstp.md: Modify peephole for different mode ldpstp and add peephole for merged zero stores. Likewise for loads. * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Add size check. (aarch64_gen_store_pair): Rename calls to match new patterns. (aarch64_gen_load_pair): Rename calls to match new patterns. * config/aarch64/aarch64-simd.md (load_pair<mode>): Rename to... (load_pair<DREG:mode><DREG2:mode>): ... This. (store_pair<mode>): Rename to... (vec_store_pair<DREG:mode><DREG2:mode>): ... This. * config/aarch64/iterators.md (DREG, DREG2, DX2, SX, SX2, DSX): New mode iterators. (V_INT_EQUIV): Handle SImode. * config/aarch64/predicates.md (aarch64_reg_zero_or_fp_zero): New predicate. * gcc.target/aarch64/ldp_stp_6.c: New. * gcc.target/aarch64/ldp_stp_7.c: New. * gcc.target/aarch64/ldp_stp_8.c: New. Co-Authored-By: Kyrylo Tkachov <kyrylo.tkachov@arm.com> From-SVN: r260538
2018-05-22 17:35:06 +02:00
GET_MODE_SIZE (<DREG:MODE>mode)))"
"ldp\\t%d0, %d2, %z1"
[(set_attr "type" "neon_ldp")]
)
[AArch64] Merge stores of D-register values with different modes This patch merges loads and stores from D-registers that are of different modes. Code like this: typedef int __attribute__((vector_size(8))) vec; struct pair { vec v; double d; } Now generates a store pair instruction: void assign (struct pair *p, vec v) { p->v = v; p->d = 1.0; } Whereas previously it generated two `str` instructions. This patch also merges storing of double zero values with long integer values: struct pair { long long l; double d; } void foo (struct pair *p) { p->l = 10; p->d = 0.0; } Now generates a single store pair instruction rather than two `str` instructions. The patch basically generalises the mode iterators on the patterns in aarch64.md and the peepholes in aarch64-ldpstp.md to take all combinations of pairs of modes so, while it may be a large-ish patch, it does fairly mechanical stuff. 2018-05-22 Jackson Woodruff <jackson.woodruff@arm.com> Kyrylo Tkachov <kyrylo.tkachov@arm.com> * config/aarch64/aarch64.md: New patterns to generate stp and ldp. (store_pair_sw, store_pair_dw): New patterns to generate stp for single words and double words. (load_pair_sw, load_pair_dw): Likewise. (store_pair_sf, store_pair_df, store_pair_si, store_pair_di): Delete. (load_pair_sf, load_pair_df, load_pair_si, load_pair_di): Delete. * config/aarch64/aarch64-ldpstp.md: Modify peephole for different mode ldpstp and add peephole for merged zero stores. Likewise for loads. * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Add size check. (aarch64_gen_store_pair): Rename calls to match new patterns. (aarch64_gen_load_pair): Rename calls to match new patterns. * config/aarch64/aarch64-simd.md (load_pair<mode>): Rename to... (load_pair<DREG:mode><DREG2:mode>): ... This. (store_pair<mode>): Rename to... (vec_store_pair<DREG:mode><DREG2:mode>): ... This. * config/aarch64/iterators.md (DREG, DREG2, DX2, SX, SX2, DSX): New mode iterators. (V_INT_EQUIV): Handle SImode. * config/aarch64/predicates.md (aarch64_reg_zero_or_fp_zero): New predicate. * gcc.target/aarch64/ldp_stp_6.c: New. * gcc.target/aarch64/ldp_stp_7.c: New. * gcc.target/aarch64/ldp_stp_8.c: New. Co-Authored-By: Kyrylo Tkachov <kyrylo.tkachov@arm.com> From-SVN: r260538
2018-05-22 17:35:06 +02:00
(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
[(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
(match_operand:DREG 1 "register_operand" "w"))
(set (match_operand:DREG2 2 "memory_operand" "=m")
(match_operand:DREG2 3 "register_operand" "w"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
[AArch64] Merge stores of D-register values with different modes This patch merges loads and stores from D-registers that are of different modes. Code like this: typedef int __attribute__((vector_size(8))) vec; struct pair { vec v; double d; } Now generates a store pair instruction: void assign (struct pair *p, vec v) { p->v = v; p->d = 1.0; } Whereas previously it generated two `str` instructions. This patch also merges storing of double zero values with long integer values: struct pair { long long l; double d; } void foo (struct pair *p) { p->l = 10; p->d = 0.0; } Now generates a single store pair instruction rather than two `str` instructions. The patch basically generalises the mode iterators on the patterns in aarch64.md and the peepholes in aarch64-ldpstp.md to take all combinations of pairs of modes so, while it may be a large-ish patch, it does fairly mechanical stuff. 2018-05-22 Jackson Woodruff <jackson.woodruff@arm.com> Kyrylo Tkachov <kyrylo.tkachov@arm.com> * config/aarch64/aarch64.md: New patterns to generate stp and ldp. (store_pair_sw, store_pair_dw): New patterns to generate stp for single words and double words. (load_pair_sw, load_pair_dw): Likewise. (store_pair_sf, store_pair_df, store_pair_si, store_pair_di): Delete. (load_pair_sf, load_pair_df, load_pair_si, load_pair_di): Delete. * config/aarch64/aarch64-ldpstp.md: Modify peephole for different mode ldpstp and add peephole for merged zero stores. Likewise for loads. * config/aarch64/aarch64.c (aarch64_operands_ok_for_ldpstp): Add size check. (aarch64_gen_store_pair): Rename calls to match new patterns. (aarch64_gen_load_pair): Rename calls to match new patterns. * config/aarch64/aarch64-simd.md (load_pair<mode>): Rename to... (load_pair<DREG:mode><DREG2:mode>): ... This. (store_pair<mode>): Rename to... (vec_store_pair<DREG:mode><DREG2:mode>): ... This. * config/aarch64/iterators.md (DREG, DREG2, DX2, SX, SX2, DSX): New mode iterators. (V_INT_EQUIV): Handle SImode. * config/aarch64/predicates.md (aarch64_reg_zero_or_fp_zero): New predicate. * gcc.target/aarch64/ldp_stp_6.c: New. * gcc.target/aarch64/ldp_stp_7.c: New. * gcc.target/aarch64/ldp_stp_8.c: New. Co-Authored-By: Kyrylo Tkachov <kyrylo.tkachov@arm.com> From-SVN: r260538
2018-05-22 17:35:06 +02:00
GET_MODE_SIZE (<DREG:MODE>mode)))"
"stp\\t%d1, %d3, %z0"
[(set_attr "type" "neon_stp")]
)
[AArch64] Support for LDP/STP of Q-registers This patch adds support for generating LDPs and STPs of Q-registers. This allows for more compact code generation and makes better use of the ISA. It's implemented in a straightforward way by allowing 16-byte modes in the sched-fusion machinery and adding appropriate peepholes in aarch64-ldpstp.md as well as the patterns themselves in aarch64-simd.md. It adds a new no_ldp_stp_qregs tuning flag. I use it to restrict the peepholes in aarch64-ldpstp.md from merging the operations together into PARALLELs. I also use it to restrict the sched fusion check that brings such loads and stores together. This is enough to avoid forming the pairs when the tuning flag is set. I didn't see any non-noise performance effect on SPEC2017 on Cortex-A72 and Cortex-A53. * config/aarch64/aarch64-tuning-flags.def (no_ldp_stp_qregs): New. * config/aarch64/aarch64.c (xgene1_tunings): Add AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS to tune_flags. (aarch64_mode_valid_for_sched_fusion_p): Allow 16-byte modes. (aarch64_classify_address): Allow 16-byte modes for load_store_pair_p. * config/aarch64/aarch64-ldpstp.md: Add peepholes for LDP STP of 128-bit modes. * config/aarch64/aarch64-simd.md (load_pair<VQ:mode><VQ2:mode>): New pattern. (vec_store_pair<VQ:mode><VQ2:mode>): Likewise. * config/aarch64/iterators.md (VQ2): New mode iterator. * gcc.target/aarch64/ldp_stp_q.c: New test. * gcc.target/aarch64/stp_vec_128_1.c: Likewise. * gcc.target/aarch64/ldp_stp_q_disable.c: Likewise. From-SVN: r261796
2018-06-20 10:57:17 +02:00
(define_insn "load_pair<VQ:mode><VQ2:mode>"
[(set (match_operand:VQ 0 "register_operand" "=w")
(match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:VQ2 2 "register_operand" "=w")
(match_operand:VQ2 3 "memory_operand" "m"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (<VQ:MODE>mode)))"
"ldp\\t%q0, %q2, %z1"
[AArch64] Support for LDP/STP of Q-registers This patch adds support for generating LDPs and STPs of Q-registers. This allows for more compact code generation and makes better use of the ISA. It's implemented in a straightforward way by allowing 16-byte modes in the sched-fusion machinery and adding appropriate peepholes in aarch64-ldpstp.md as well as the patterns themselves in aarch64-simd.md. It adds a new no_ldp_stp_qregs tuning flag. I use it to restrict the peepholes in aarch64-ldpstp.md from merging the operations together into PARALLELs. I also use it to restrict the sched fusion check that brings such loads and stores together. This is enough to avoid forming the pairs when the tuning flag is set. I didn't see any non-noise performance effect on SPEC2017 on Cortex-A72 and Cortex-A53. * config/aarch64/aarch64-tuning-flags.def (no_ldp_stp_qregs): New. * config/aarch64/aarch64.c (xgene1_tunings): Add AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS to tune_flags. (aarch64_mode_valid_for_sched_fusion_p): Allow 16-byte modes. (aarch64_classify_address): Allow 16-byte modes for load_store_pair_p. * config/aarch64/aarch64-ldpstp.md: Add peepholes for LDP STP of 128-bit modes. * config/aarch64/aarch64-simd.md (load_pair<VQ:mode><VQ2:mode>): New pattern. (vec_store_pair<VQ:mode><VQ2:mode>): Likewise. * config/aarch64/iterators.md (VQ2): New mode iterator. * gcc.target/aarch64/ldp_stp_q.c: New test. * gcc.target/aarch64/stp_vec_128_1.c: Likewise. * gcc.target/aarch64/ldp_stp_q_disable.c: Likewise. From-SVN: r261796
2018-06-20 10:57:17 +02:00
[(set_attr "type" "neon_ldp_q")]
)
(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
[(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
(match_operand:VQ 1 "register_operand" "w"))
(set (match_operand:VQ2 2 "memory_operand" "=m")
(match_operand:VQ2 3 "register_operand" "w"))]
"TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (<VQ:MODE>mode)))"
"stp\\t%q1, %q3, %z0"
[AArch64] Support for LDP/STP of Q-registers This patch adds support for generating LDPs and STPs of Q-registers. This allows for more compact code generation and makes better use of the ISA. It's implemented in a straightforward way by allowing 16-byte modes in the sched-fusion machinery and adding appropriate peepholes in aarch64-ldpstp.md as well as the patterns themselves in aarch64-simd.md. It adds a new no_ldp_stp_qregs tuning flag. I use it to restrict the peepholes in aarch64-ldpstp.md from merging the operations together into PARALLELs. I also use it to restrict the sched fusion check that brings such loads and stores together. This is enough to avoid forming the pairs when the tuning flag is set. I didn't see any non-noise performance effect on SPEC2017 on Cortex-A72 and Cortex-A53. * config/aarch64/aarch64-tuning-flags.def (no_ldp_stp_qregs): New. * config/aarch64/aarch64.c (xgene1_tunings): Add AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS to tune_flags. (aarch64_mode_valid_for_sched_fusion_p): Allow 16-byte modes. (aarch64_classify_address): Allow 16-byte modes for load_store_pair_p. * config/aarch64/aarch64-ldpstp.md: Add peepholes for LDP STP of 128-bit modes. * config/aarch64/aarch64-simd.md (load_pair<VQ:mode><VQ2:mode>): New pattern. (vec_store_pair<VQ:mode><VQ2:mode>): Likewise. * config/aarch64/iterators.md (VQ2): New mode iterator. * gcc.target/aarch64/ldp_stp_q.c: New test. * gcc.target/aarch64/stp_vec_128_1.c: Likewise. * gcc.target/aarch64/ldp_stp_q_disable.c: Likewise. From-SVN: r261796
2018-06-20 10:57:17 +02:00
[(set_attr "type" "neon_stp_q")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_split
config.gcc: Add arm_bf16.h. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * config.gcc: Add arm_bf16.h. * config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_std_type): Add BFmode. (aarch64_init_simd_builtin_types): Define element types for vector types. (aarch64_init_bf16_types): New function. (aarch64_general_init_builtins): Add arm_init_bf16_types function call. * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector modes. * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types. * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move patterns. * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF. (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Add support for BF types. (aarch64_gimplify_va_arg_expr): Add support for BF types. (aarch64_vq_mode): Add support for BF types. (aarch64_simd_container_mode): Add support for BF types. (aarch64_mangle_type): Add support for BF scalar type. * config/aarch64/aarch64.md: Add BFmode to movhf pattern. * config/aarch64/arm_bf16.h: New file. * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types. * config/aarch64/iterators.md: Add BF types to mode attributes. (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test. * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test. * gcc.target/aarch64/bfloat16_scalar_1.c: New test. * gcc.target/aarch64/bfloat16_scalar_2.c: New test. * gcc.target/aarch64/bfloat16_scalar_3.c: New test. * gcc.target/aarch64/bfloat16_scalar_4.c: New test. * gcc.target/aarch64/bfloat16_simd_1.c: New test. * gcc.target/aarch64/bfloat16_simd_2.c: New test. * gcc.target/aarch64/bfloat16_simd_3.c: New test. From-SVN: r280129
2020-01-10 20:23:41 +01:00
[(set (match_operand:VQMOV 0 "register_operand" "")
(match_operand:VQMOV 1 "register_operand" ""))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD && reload_completed
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
[(const_int 0)]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
{
aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
DONE;
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
})
(define_split
config.gcc: Add arm_bf16.h. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * config.gcc: Add arm_bf16.h. * config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_std_type): Add BFmode. (aarch64_init_simd_builtin_types): Define element types for vector types. (aarch64_init_bf16_types): New function. (aarch64_general_init_builtins): Add arm_init_bf16_types function call. * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector modes. * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types. * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move patterns. * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF. (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Add support for BF types. (aarch64_gimplify_va_arg_expr): Add support for BF types. (aarch64_vq_mode): Add support for BF types. (aarch64_simd_container_mode): Add support for BF types. (aarch64_mangle_type): Add support for BF scalar type. * config/aarch64/aarch64.md: Add BFmode to movhf pattern. * config/aarch64/arm_bf16.h: New file. * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types. * config/aarch64/iterators.md: Add BF types to mode attributes. (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test. * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test. * gcc.target/aarch64/bfloat16_scalar_1.c: New test. * gcc.target/aarch64/bfloat16_scalar_2.c: New test. * gcc.target/aarch64/bfloat16_scalar_3.c: New test. * gcc.target/aarch64/bfloat16_scalar_4.c: New test. * gcc.target/aarch64/bfloat16_simd_1.c: New test. * gcc.target/aarch64/bfloat16_simd_2.c: New test. * gcc.target/aarch64/bfloat16_simd_3.c: New test. From-SVN: r280129
2020-01-10 20:23:41 +01:00
[(set (match_operand:VQMOV 0 "register_operand" "")
(match_operand:VQMOV 1 "register_operand" ""))]
"TARGET_SIMD && reload_completed
&& ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
|| (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
[(const_int 0)]
{
aarch64_split_simd_move (operands[0], operands[1]);
DONE;
})
[gen/AArch64] Generate helpers for substituting iterator values into pattern names Given a pattern like: (define_insn "aarch64_frecpe<mode>" ...) the SVE ACLE implementation wants to generate the pattern for a particular (non-constant) mode. This patch automatically generates helpers to do that, specifically: // Return CODE_FOR_nothing on failure. insn_code maybe_code_for_aarch64_frecpe (machine_mode); // Assert that the code exists. insn_code code_for_aarch64_frecpe (machine_mode); // Return NULL_RTX on failure. rtx maybe_gen_aarch64_frecpe (machine_mode, rtx, rtx); // Assert that generation succeeds. rtx gen_aarch64_frecpe (machine_mode, rtx, rtx); Many patterns don't have sensible names when all <...>s are removed. E.g. "<optab><mode>2" would give a base name "2". The new functions therefore require explicit opt-in, which should also help to reduce code bloat. The (arbitrary) opt-in syntax I went for was to prefix the pattern name with '@', similarly to the existing '*' marker. The patch also makes config/aarch64 use the new routines in cases where they obviously apply. This was mostly straight-forward, but it seemed odd that we defined: aarch64_reload_movcp<...><P:mode> but then only used it with DImode, never SImode. If we should be using Pmode instead of DImode, then that's a simple change, but should probably be a separate patch. 2018-08-02 Richard Sandiford <richard.sandiford@arm.com> gcc/ * doc/md.texi: Expand the documentation of instruction names to mention port-local uses. Document '@' in pattern names. * read-md.h (overloaded_instance, overloaded_name): New structs. (mapping): Declare. (md_reader::handle_overloaded_name): New member function. (md_reader::get_overloads): Likewise. (md_reader::m_first_overload): New member variable. (md_reader::m_next_overload_ptr): Likewise. (md_reader::m_overloads_htab): Likewise. * read-md.c (md_reader::md_reader): Initialize m_first_overload, m_next_overload_ptr and m_overloads_htab. * read-rtl.c (iterator_group): Add "type" and "get_c_token" fields. (get_mode_token, get_code_token, get_int_token): New functions. (map_attr_string): Add an optional argument that passes back the associated iterator. (overloaded_name_hash, overloaded_name_eq_p, named_rtx_p): (md_reader::handle_overloaded_name, add_overload_instance): New functions. (apply_iterators): Handle '@' names. Report an error if '@' is used without iterators. (initialize_iterators): Initialize the new iterator_group fields. * genopinit.c (handle_overloaded_code_for) (handle_overloaded_gen): New functions. (main): Use them to print declarations of maybe_code_for_* and maybe_gen_* functions, and inline definitions of code_for_* and gen_*. * genemit.c (print_overload_arguments, print_overload_test) (handle_overloaded_code_for, handle_overloaded_gen): New functions. (main): Use it to print definitions of maybe_code_for_* and maybe_gen_* functions. * config/aarch64/aarch64.c (aarch64_split_128bit_move): Use gen_aarch64_mov{low,high}_di and gen_aarch64_movdi_{low,high} instead of explicit mode checks. (aarch64_split_simd_combine): Likewise gen_aarch64_simd_combine. (aarch64_split_simd_move): Likewise gen_aarch64_split_simd_mov. (aarch64_emit_load_exclusive): Likewise gen_aarch64_load_exclusive. (aarch64_emit_store_exclusive): Likewise gen_aarch64_store_exclusive. (aarch64_expand_compare_and_swap): Likewise gen_aarch64_compare_and_swap and gen_aarch64_compare_and_swap_lse (aarch64_gen_atomic_cas): Likewise gen_aarch64_atomic_cas. (aarch64_emit_atomic_swap): Likewise gen_aarch64_atomic_swp. (aarch64_constant_pool_reload_icode): Delete. (aarch64_secondary_reload): Use code_for_aarch64_reload_movcp instead of aarch64_constant_pool_reload_icode. Use code_for_aarch64_reload_mov instead of explicit mode checks. (rsqrte_type, get_rsqrte_type, rsqrts_type, get_rsqrts_type): Delete. (aarch64_emit_approx_sqrt): Use gen_aarch64_rsqrte instead of get_rsqrte_type and gen_aarch64_rsqrts instead of gen_rqrts_type. (recpe_type, get_recpe_type, recps_type, get_recps_type): Delete. (aarch64_emit_approx_div): Use gen_aarch64_frecpe instead of get_recpe_type and gen_aarch64_frecps instead of get_recps_type. (aarch64_atomic_load_op_code): Delete. (aarch64_emit_atomic_load_op): Likewise. (aarch64_gen_atomic_ldop): Use UNSPECV_ATOMIC_* instead of aarch64_atomic_load_op_code. Use gen_aarch64_atomic_load instead of aarch64_emit_atomic_load_op. * config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>) (aarch64_reload_movcp<VALL:mode><P:mode>, aarch64_reload_mov<mode>) (aarch64_movdi_<mode>low, aarch64_movdi_<mode>high) (aarch64_mov<mode>high_di, aarch64_mov<mode>low_di): Add a '@' character before the pattern name. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (aarch64_rsqrte<mode>, aarch64_rsqrts<mode>) (aarch64_simd_combine<mode>, aarch64_frecpe<mode>) (aarch64_frecps<mode>): Likewise. * config/aarch64/atomics.md (atomic_compare_and_swap<mode>) (aarch64_compare_and_swap<mode>, aarch64_compare_and_swap<mode>_lse) (aarch64_load_exclusive<mode>, aarch64_store_exclusive<mode>) (aarch64_atomic_swp<mode>, aarch64_atomic_cas<mode>) (aarch64_atomic_load<atomic_ldop><mode>): Likewise. From-SVN: r263251
2018-08-02 12:59:35 +02:00
(define_expand "@aarch64_split_simd_mov<mode>"
config.gcc: Add arm_bf16.h. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * config.gcc: Add arm_bf16.h. * config/aarch64/aarch64-builtins.c (aarch64_simd_builtin_std_type): Add BFmode. (aarch64_init_simd_builtin_types): Define element types for vector types. (aarch64_init_bf16_types): New function. (aarch64_general_init_builtins): Add arm_init_bf16_types function call. * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector modes. * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types. * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move patterns. * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF. (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Add support for BF types. (aarch64_gimplify_va_arg_expr): Add support for BF types. (aarch64_vq_mode): Add support for BF types. (aarch64_simd_container_mode): Add support for BF types. (aarch64_mangle_type): Add support for BF scalar type. * config/aarch64/aarch64.md: Add BFmode to movhf pattern. * config/aarch64/arm_bf16.h: New file. * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types. * config/aarch64/iterators.md: Add BF types to mode attributes. (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New. 2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test. * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test. * gcc.target/aarch64/bfloat16_scalar_1.c: New test. * gcc.target/aarch64/bfloat16_scalar_2.c: New test. * gcc.target/aarch64/bfloat16_scalar_3.c: New test. * gcc.target/aarch64/bfloat16_scalar_4.c: New test. * gcc.target/aarch64/bfloat16_simd_1.c: New test. * gcc.target/aarch64/bfloat16_simd_2.c: New test. * gcc.target/aarch64/bfloat16_simd_3.c: New test. From-SVN: r280129
2020-01-10 20:23:41 +01:00
[(set (match_operand:VQMOV 0)
(match_operand:VQMOV 1))]
"TARGET_SIMD"
{
rtx dst = operands[0];
rtx src = operands[1];
if (GP_REGNUM_P (REGNO (src)))
{
rtx src_low_part = gen_lowpart (<VHALF>mode, src);
rtx src_high_part = gen_highpart (<VHALF>mode, src);
rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
emit_move_insn (dst_low_part, src_low_part);
emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
src_high_part));
}
else
{
rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
}
DONE;
}
)
(define_expand "aarch64_get_half<mode>"
[(set (match_operand:<VHALF> 0 "register_operand")
(vec_select:<VHALF>
(match_operand:VQMOV 1 "register_operand")
(match_operand 2 "ascending_int_parallel")))]
"TARGET_SIMD"
)
(define_expand "aarch64_get_low<mode>"
[(match_operand:<VHALF> 0 "register_operand")
(match_operand:VQMOV 1 "register_operand")]
"TARGET_SIMD"
{
rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
DONE;
}
)
(define_expand "aarch64_get_high<mode>"
[(match_operand:<VHALF> 0 "register_operand")
(match_operand:VQMOV 1 "register_operand")]
"TARGET_SIMD"
{
rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
DONE;
}
)
(define_insn_and_split "aarch64_simd_mov_from_<mode>low"
[(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
(vec_select:<VHALF>
(match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
(match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
"TARGET_SIMD"
"@
#
umov\t%0, %1.d[0]"
"&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
[(set (match_dup 0) (match_dup 1))]
{
operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
}
[(set_attr "type" "mov_reg,neon_to_gp<q>")
(set_attr "length" "4")]
)
(define_insn "aarch64_simd_mov_from_<mode>high"
[(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
(vec_select:<VHALF>
(match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
(match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
"TARGET_SIMD"
"@
dup\\t%d0, %1.d[1]
umov\t%0, %1.d[1]"
[(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
(set_attr "length" "4")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "orn<mode>3"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
(match_operand:VDQ_I 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_logic<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "bic<mode>3"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
(match_operand:VDQ_I 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_logic<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "add<mode>3"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_add<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "sub<mode>3"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_sub<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "mul<mode>3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_mul_<Vetype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "bswap<mode>2"
[(set (match_operand:VDQHSD 0 "register_operand" "=w")
(bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
"TARGET_SIMD"
"rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_rev<q>")]
)
(define_insn "aarch64_rbit<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
UNSPEC_RBIT))]
"TARGET_SIMD"
"rbit\\t%0.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_rbit")]
)
(define_expand "ctz<mode>2"
[(set (match_operand:VS 0 "register_operand")
(ctz:VS (match_operand:VS 1 "register_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
<MODE>mode, 0);
emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
DONE;
}
)
(define_expand "xorsign<mode>3"
[(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")]
"TARGET_SIMD"
{
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
machine_mode imode = <V_INT_EQUIV>mode;
rtx v_bitmask = gen_reg_rtx (imode);
rtx op1x = gen_reg_rtx (imode);
rtx op2x = gen_reg_rtx (imode);
rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
emit_move_insn (v_bitmask,
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
HOST_WIDE_INT_M1U << bits));
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
emit_move_insn (operands[0],
lowpart_subreg (<MODE>mode, op1x, imode));
DONE;
}
)
aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. gcc/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. (emit-rtl.h): Include. (TYPES_QUADOP_LANE_PAIR): New. (aarch64_simd_expand_args): Use it. (aarch64_simd_expand_builtin): Likewise. (AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_laneq_builtin_datum): New. (FCMLA_LANEQ_BUILTIN, AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_lane_builtin_data, aarch64_init_fcmla_laneq_builtins, aarch64_expand_fcmla_builtin): New. (aarch64_init_builtins): Add aarch64_init_fcmla_laneq_builtins. (aarch64_expand_buildin): Add AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ2700_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF. * config/aarch64/iterators.md (FCMLA_maybe_lane): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add __ARM_FEATURE_COMPLEX. * config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, fcmla90, fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, fcmla_lane270, fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270, fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New. * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>, aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>,aarch64_fcadd<rot><mode>, aarch64_fcmla<rot><mode>): New. * config/aarch64/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmla_rot90_lane_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot180_laneq_f16): New. (vcmla_rot180_lane_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmla_rot270_lane_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcaddq_rot90_f64): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcaddq_rot270_f64): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmlaq_f64): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmlaq_rot90_f64): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmlaq_rot180_f64): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmlaq_rot270_f64): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. * config/aarch64/aarch64.h (TARGET_COMPLEX): New. * config/aarch64/iterators.md (UNSPEC_FCADD90, UNSPEC_FCADD270, UNSPEC_FCMLA, UNSPEC_FCMLA90, UNSPEC_FCMLA180, UNSPEC_FCMLA270): New. (FCADD, FCMLA): New. (rot): New. * config/arm/types.md (neon_fcadd, neon_fcmla): New. gcc/testsuite/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test. From-SVN: r267795
2019-01-10 04:30:59 +01:00
;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
;; fact that their usage need to guarantee that the source vectors are
;; contiguous. It would be wrong to describe the operation without being able
;; to describe the permute that is also required, but even if that is done
;; the permute would have been created as a LOAD_LANES which means the values
;; in the registers are in the wrong order.
(define_insn "aarch64_fcadd<rot><mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
FCADD))]
"TARGET_COMPLEX"
"fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
[(set_attr "type" "neon_fcadd")]
)
(define_expand "cadd<rot><mode>3"
[(set (match_operand:VHSDF 0 "register_operand")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")]
FCADD))]
"TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
)
aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. gcc/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. (emit-rtl.h): Include. (TYPES_QUADOP_LANE_PAIR): New. (aarch64_simd_expand_args): Use it. (aarch64_simd_expand_builtin): Likewise. (AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_laneq_builtin_datum): New. (FCMLA_LANEQ_BUILTIN, AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_lane_builtin_data, aarch64_init_fcmla_laneq_builtins, aarch64_expand_fcmla_builtin): New. (aarch64_init_builtins): Add aarch64_init_fcmla_laneq_builtins. (aarch64_expand_buildin): Add AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ2700_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF. * config/aarch64/iterators.md (FCMLA_maybe_lane): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add __ARM_FEATURE_COMPLEX. * config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, fcmla90, fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, fcmla_lane270, fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270, fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New. * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>, aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>,aarch64_fcadd<rot><mode>, aarch64_fcmla<rot><mode>): New. * config/aarch64/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmla_rot90_lane_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot180_laneq_f16): New. (vcmla_rot180_lane_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmla_rot270_lane_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcaddq_rot90_f64): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcaddq_rot270_f64): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmlaq_f64): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmlaq_rot90_f64): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmlaq_rot180_f64): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmlaq_rot270_f64): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. * config/aarch64/aarch64.h (TARGET_COMPLEX): New. * config/aarch64/iterators.md (UNSPEC_FCADD90, UNSPEC_FCADD270, UNSPEC_FCMLA, UNSPEC_FCMLA90, UNSPEC_FCMLA180, UNSPEC_FCMLA270): New. (FCADD, FCMLA): New. (rot): New. * config/arm/types.md (neon_fcadd, neon_fcmla): New. gcc/testsuite/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test. From-SVN: r267795
2019-01-10 04:30:59 +01:00
(define_insn "aarch64_fcmla<rot><mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
(unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
(match_operand:VHSDF 3 "register_operand" "w")]
FCMLA)))]
"TARGET_COMPLEX"
"fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
[(set_attr "type" "neon_fcmla")]
)
(define_insn "aarch64_fcmla_lane<rot><mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
(unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
(match_operand:VHSDF 3 "register_operand" "w")
(match_operand:SI 4 "const_int_operand" "n")]
FCMLA)))]
"TARGET_COMPLEX"
{
operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
}
aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. gcc/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. (emit-rtl.h): Include. (TYPES_QUADOP_LANE_PAIR): New. (aarch64_simd_expand_args): Use it. (aarch64_simd_expand_builtin): Likewise. (AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_laneq_builtin_datum): New. (FCMLA_LANEQ_BUILTIN, AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_lane_builtin_data, aarch64_init_fcmla_laneq_builtins, aarch64_expand_fcmla_builtin): New. (aarch64_init_builtins): Add aarch64_init_fcmla_laneq_builtins. (aarch64_expand_buildin): Add AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ2700_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF. * config/aarch64/iterators.md (FCMLA_maybe_lane): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add __ARM_FEATURE_COMPLEX. * config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, fcmla90, fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, fcmla_lane270, fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270, fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New. * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>, aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>,aarch64_fcadd<rot><mode>, aarch64_fcmla<rot><mode>): New. * config/aarch64/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmla_rot90_lane_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot180_laneq_f16): New. (vcmla_rot180_lane_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmla_rot270_lane_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcaddq_rot90_f64): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcaddq_rot270_f64): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmlaq_f64): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmlaq_rot90_f64): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmlaq_rot180_f64): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmlaq_rot270_f64): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. * config/aarch64/aarch64.h (TARGET_COMPLEX): New. * config/aarch64/iterators.md (UNSPEC_FCADD90, UNSPEC_FCADD270, UNSPEC_FCMLA, UNSPEC_FCMLA90, UNSPEC_FCMLA180, UNSPEC_FCMLA270): New. (FCADD, FCMLA): New. (rot): New. * config/arm/types.md (neon_fcadd, neon_fcmla): New. gcc/testsuite/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test. From-SVN: r267795
2019-01-10 04:30:59 +01:00
[(set_attr "type" "neon_fcmla")]
)
(define_insn "aarch64_fcmla_laneq<rot>v4hf"
[(set (match_operand:V4HF 0 "register_operand" "=w")
(plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
(unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V8HF 3 "register_operand" "w")
(match_operand:SI 4 "const_int_operand" "n")]
FCMLA)))]
"TARGET_COMPLEX"
{
operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
}
aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. gcc/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. (emit-rtl.h): Include. (TYPES_QUADOP_LANE_PAIR): New. (aarch64_simd_expand_args): Use it. (aarch64_simd_expand_builtin): Likewise. (AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_laneq_builtin_datum): New. (FCMLA_LANEQ_BUILTIN, AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_lane_builtin_data, aarch64_init_fcmla_laneq_builtins, aarch64_expand_fcmla_builtin): New. (aarch64_init_builtins): Add aarch64_init_fcmla_laneq_builtins. (aarch64_expand_buildin): Add AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ2700_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF. * config/aarch64/iterators.md (FCMLA_maybe_lane): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add __ARM_FEATURE_COMPLEX. * config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, fcmla90, fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, fcmla_lane270, fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270, fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New. * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>, aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>,aarch64_fcadd<rot><mode>, aarch64_fcmla<rot><mode>): New. * config/aarch64/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmla_rot90_lane_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot180_laneq_f16): New. (vcmla_rot180_lane_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmla_rot270_lane_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcaddq_rot90_f64): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcaddq_rot270_f64): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmlaq_f64): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmlaq_rot90_f64): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmlaq_rot180_f64): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmlaq_rot270_f64): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. * config/aarch64/aarch64.h (TARGET_COMPLEX): New. * config/aarch64/iterators.md (UNSPEC_FCADD90, UNSPEC_FCADD270, UNSPEC_FCMLA, UNSPEC_FCMLA90, UNSPEC_FCMLA180, UNSPEC_FCMLA270): New. (FCADD, FCMLA): New. (rot): New. * config/arm/types.md (neon_fcadd, neon_fcmla): New. gcc/testsuite/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test. From-SVN: r267795
2019-01-10 04:30:59 +01:00
[(set_attr "type" "neon_fcmla")]
)
(define_insn "aarch64_fcmlaq_lane<rot><mode>"
[(set (match_operand:VQ_HSF 0 "register_operand" "=w")
(plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
(unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
(match_operand:<VHALF> 3 "register_operand" "w")
(match_operand:SI 4 "const_int_operand" "n")]
FCMLA)))]
"TARGET_COMPLEX"
{
int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
operands[4]
= gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
}
aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. gcc/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. (emit-rtl.h): Include. (TYPES_QUADOP_LANE_PAIR): New. (aarch64_simd_expand_args): Use it. (aarch64_simd_expand_builtin): Likewise. (AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_laneq_builtin_datum): New. (FCMLA_LANEQ_BUILTIN, AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_lane_builtin_data, aarch64_init_fcmla_laneq_builtins, aarch64_expand_fcmla_builtin): New. (aarch64_init_builtins): Add aarch64_init_fcmla_laneq_builtins. (aarch64_expand_buildin): Add AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ2700_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF. * config/aarch64/iterators.md (FCMLA_maybe_lane): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add __ARM_FEATURE_COMPLEX. * config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, fcmla90, fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, fcmla_lane270, fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270, fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New. * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>, aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>,aarch64_fcadd<rot><mode>, aarch64_fcmla<rot><mode>): New. * config/aarch64/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmla_rot90_lane_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot180_laneq_f16): New. (vcmla_rot180_lane_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmla_rot270_lane_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcaddq_rot90_f64): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcaddq_rot270_f64): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmlaq_f64): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmlaq_rot90_f64): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmlaq_rot180_f64): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmlaq_rot270_f64): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. * config/aarch64/aarch64.h (TARGET_COMPLEX): New. * config/aarch64/iterators.md (UNSPEC_FCADD90, UNSPEC_FCADD270, UNSPEC_FCMLA, UNSPEC_FCMLA90, UNSPEC_FCMLA180, UNSPEC_FCMLA270): New. (FCADD, FCMLA): New. (rot): New. * config/arm/types.md (neon_fcadd, neon_fcmla): New. gcc/testsuite/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test. From-SVN: r267795
2019-01-10 04:30:59 +01:00
[(set_attr "type" "neon_fcmla")]
)
AArch64: Add NEON, SVE and SVE2 RTL patterns for Multiply, FMS and FMA. This adds implementation for the optabs for complex operations. With this the following C code: void g (float complex a[restrict N], float complex b[restrict N], float complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i]; } generates NEON: g: movi v3.4s, 0 mov x3, 0 .p2align 3,,7 .L2: mov v0.16b, v3.16b ldr q2, [x1, x3] ldr q1, [x0, x3] fcmla v0.4s, v1.4s, v2.4s, #0 fcmla v0.4s, v1.4s, v2.4s, #90 str q0, [x2, x3] add x3, x3, 16 cmp x3, 1600 bne .L2 ret SVE: g: mov x3, 0 mov x4, 400 ptrue p1.b, all whilelo p0.s, xzr, x4 mov z3.s, #0 .p2align 3,,7 .L2: ld1w z1.s, p0/z, [x0, x3, lsl 2] ld1w z2.s, p0/z, [x1, x3, lsl 2] movprfx z0, z3 fcmla z0.s, p1/m, z1.s, z2.s, #0 fcmla z0.s, p1/m, z1.s, z2.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret SVE2 (with int instead of float) g: mov x3, 0 mov x4, 400 mov z3.b, #0 whilelo p0.s, xzr, x4 .p2align 3,,7 .L2: ld1w z1.s, p0/z, [x0, x3, lsl 2] ld1w z2.s, p0/z, [x1, x3, lsl 2] movprfx z0, z3 cmla z0.s, z1.s, z2.s, #0 cmla z0.s, z1.s, z2.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret gcc/ChangeLog: * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New. * config/aarch64/iterators.md (UNSPEC_FCMUL, UNSPEC_FCMUL180, UNSPEC_FCMLA_CONJ, UNSPEC_FCMLA180_CONJ, UNSPEC_CMLA_CONJ, UNSPEC_CMLA180_CONJ, UNSPEC_CMUL, UNSPEC_CMUL180, FCMLA_OP, FCMUL_OP, conj_op, rotsplit1, rotsplit2, fcmac1, sve_rot1, sve_rot2, SVE2_INT_CMLA_OP, SVE2_INT_CMUL_OP, SVE2_INT_CADD_OP): New. (rot): Add UNSPEC_FCMUL, UNSPEC_FCMUL180. (rot_op): Renamed to conj_op. * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New. * config/aarch64/aarch64-sve2.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New.
2021-01-15 19:50:27 +01:00
;; The complex mla/mls operations always need to expand to two instructions.
;; The first operation does half the computation and the second does the
;; remainder. Because of this, expand early.
(define_expand "cml<fcmac1><conj_op><mode>4"
[(set (match_operand:VHSDF 0 "register_operand")
(plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")]
FCMLA_OP)
(match_operand:VHSDF 3 "register_operand")))]
AArch64: Add NEON, SVE and SVE2 RTL patterns for Multiply, FMS and FMA. This adds implementation for the optabs for complex operations. With this the following C code: void g (float complex a[restrict N], float complex b[restrict N], float complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i]; } generates NEON: g: movi v3.4s, 0 mov x3, 0 .p2align 3,,7 .L2: mov v0.16b, v3.16b ldr q2, [x1, x3] ldr q1, [x0, x3] fcmla v0.4s, v1.4s, v2.4s, #0 fcmla v0.4s, v1.4s, v2.4s, #90 str q0, [x2, x3] add x3, x3, 16 cmp x3, 1600 bne .L2 ret SVE: g: mov x3, 0 mov x4, 400 ptrue p1.b, all whilelo p0.s, xzr, x4 mov z3.s, #0 .p2align 3,,7 .L2: ld1w z1.s, p0/z, [x0, x3, lsl 2] ld1w z2.s, p0/z, [x1, x3, lsl 2] movprfx z0, z3 fcmla z0.s, p1/m, z1.s, z2.s, #0 fcmla z0.s, p1/m, z1.s, z2.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret SVE2 (with int instead of float) g: mov x3, 0 mov x4, 400 mov z3.b, #0 whilelo p0.s, xzr, x4 .p2align 3,,7 .L2: ld1w z1.s, p0/z, [x0, x3, lsl 2] ld1w z2.s, p0/z, [x1, x3, lsl 2] movprfx z0, z3 cmla z0.s, z1.s, z2.s, #0 cmla z0.s, z1.s, z2.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret gcc/ChangeLog: * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New. * config/aarch64/iterators.md (UNSPEC_FCMUL, UNSPEC_FCMUL180, UNSPEC_FCMLA_CONJ, UNSPEC_FCMLA180_CONJ, UNSPEC_CMLA_CONJ, UNSPEC_CMLA180_CONJ, UNSPEC_CMUL, UNSPEC_CMUL180, FCMLA_OP, FCMUL_OP, conj_op, rotsplit1, rotsplit2, fcmac1, sve_rot1, sve_rot2, SVE2_INT_CMLA_OP, SVE2_INT_CMUL_OP, SVE2_INT_CADD_OP): New. (rot): Add UNSPEC_FCMUL, UNSPEC_FCMUL180. (rot_op): Renamed to conj_op. * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New. * config/aarch64/aarch64-sve2.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New.
2021-01-15 19:50:27 +01:00
"TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
{
rtx tmp = gen_reg_rtx (<MODE>mode);
emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
operands[2], operands[1]));
AArch64: Add NEON, SVE and SVE2 RTL patterns for Multiply, FMS and FMA. This adds implementation for the optabs for complex operations. With this the following C code: void g (float complex a[restrict N], float complex b[restrict N], float complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i]; } generates NEON: g: movi v3.4s, 0 mov x3, 0 .p2align 3,,7 .L2: mov v0.16b, v3.16b ldr q2, [x1, x3] ldr q1, [x0, x3] fcmla v0.4s, v1.4s, v2.4s, #0 fcmla v0.4s, v1.4s, v2.4s, #90 str q0, [x2, x3] add x3, x3, 16 cmp x3, 1600 bne .L2 ret SVE: g: mov x3, 0 mov x4, 400 ptrue p1.b, all whilelo p0.s, xzr, x4 mov z3.s, #0 .p2align 3,,7 .L2: ld1w z1.s, p0/z, [x0, x3, lsl 2] ld1w z2.s, p0/z, [x1, x3, lsl 2] movprfx z0, z3 fcmla z0.s, p1/m, z1.s, z2.s, #0 fcmla z0.s, p1/m, z1.s, z2.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret SVE2 (with int instead of float) g: mov x3, 0 mov x4, 400 mov z3.b, #0 whilelo p0.s, xzr, x4 .p2align 3,,7 .L2: ld1w z1.s, p0/z, [x0, x3, lsl 2] ld1w z2.s, p0/z, [x1, x3, lsl 2] movprfx z0, z3 cmla z0.s, z1.s, z2.s, #0 cmla z0.s, z1.s, z2.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret gcc/ChangeLog: * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New. * config/aarch64/iterators.md (UNSPEC_FCMUL, UNSPEC_FCMUL180, UNSPEC_FCMLA_CONJ, UNSPEC_FCMLA180_CONJ, UNSPEC_CMLA_CONJ, UNSPEC_CMLA180_CONJ, UNSPEC_CMUL, UNSPEC_CMUL180, FCMLA_OP, FCMUL_OP, conj_op, rotsplit1, rotsplit2, fcmac1, sve_rot1, sve_rot2, SVE2_INT_CMLA_OP, SVE2_INT_CMUL_OP, SVE2_INT_CADD_OP): New. (rot): Add UNSPEC_FCMUL, UNSPEC_FCMUL180. (rot_op): Renamed to conj_op. * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New. * config/aarch64/aarch64-sve2.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New.
2021-01-15 19:50:27 +01:00
emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
operands[2], operands[1]));
AArch64: Add NEON, SVE and SVE2 RTL patterns for Multiply, FMS and FMA. This adds implementation for the optabs for complex operations. With this the following C code: void g (float complex a[restrict N], float complex b[restrict N], float complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i]; } generates NEON: g: movi v3.4s, 0 mov x3, 0 .p2align 3,,7 .L2: mov v0.16b, v3.16b ldr q2, [x1, x3] ldr q1, [x0, x3] fcmla v0.4s, v1.4s, v2.4s, #0 fcmla v0.4s, v1.4s, v2.4s, #90 str q0, [x2, x3] add x3, x3, 16 cmp x3, 1600 bne .L2 ret SVE: g: mov x3, 0 mov x4, 400 ptrue p1.b, all whilelo p0.s, xzr, x4 mov z3.s, #0 .p2align 3,,7 .L2: ld1w z1.s, p0/z, [x0, x3, lsl 2] ld1w z2.s, p0/z, [x1, x3, lsl 2] movprfx z0, z3 fcmla z0.s, p1/m, z1.s, z2.s, #0 fcmla z0.s, p1/m, z1.s, z2.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret SVE2 (with int instead of float) g: mov x3, 0 mov x4, 400 mov z3.b, #0 whilelo p0.s, xzr, x4 .p2align 3,,7 .L2: ld1w z1.s, p0/z, [x0, x3, lsl 2] ld1w z2.s, p0/z, [x1, x3, lsl 2] movprfx z0, z3 cmla z0.s, z1.s, z2.s, #0 cmla z0.s, z1.s, z2.s, #90 st1w z0.s, p0, [x2, x3, lsl 2] incw x3 whilelo p0.s, x3, x4 b.any .L2 ret gcc/ChangeLog: * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New. * config/aarch64/iterators.md (UNSPEC_FCMUL, UNSPEC_FCMUL180, UNSPEC_FCMLA_CONJ, UNSPEC_FCMLA180_CONJ, UNSPEC_CMLA_CONJ, UNSPEC_CMLA180_CONJ, UNSPEC_CMUL, UNSPEC_CMUL180, FCMLA_OP, FCMUL_OP, conj_op, rotsplit1, rotsplit2, fcmac1, sve_rot1, sve_rot2, SVE2_INT_CMLA_OP, SVE2_INT_CMUL_OP, SVE2_INT_CADD_OP): New. (rot): Add UNSPEC_FCMUL, UNSPEC_FCMUL180. (rot_op): Renamed to conj_op. * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New. * config/aarch64/aarch64-sve2.md (cml<fcmac1><conj_op><mode>4, cmul<conj_op><mode>3): New.
2021-01-15 19:50:27 +01:00
DONE;
})
;; The complex mul operations always need to expand to two instructions.
;; The first operation does half the computation and the second does the
;; remainder. Because of this, expand early.
(define_expand "cmul<conj_op><mode>3"
[(set (match_operand:VHSDF 0 "register_operand")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")]
FCMUL_OP))]
"TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
{
rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
rtx res1 = gen_reg_rtx (<MODE>mode);
emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
operands[2], operands[1]));
emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
operands[2], operands[1]));
DONE;
})
;; These expands map to the Dot Product optab the vectorizer checks for
;; and to the intrinsics patttern.
;; The auto-vectorizer expects a dot product builtin that also does an
;; accumulation into the provided register.
;; Given the following pattern
;;
;; for (i=0; i<len; i++) {
;; c = a[i] * b[i];
;; r += c;
;; }
;; return result;
;;
;; This can be auto-vectorized to
;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
;;
;; given enough iterations. However the vectorizer can keep unrolling the loop
;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
;; ...
;;
;; and so the vectorizer provides r, in which the result has to be accumulated.
(define_insn "<sur>dot_prod<vsi2qi>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
(match_operand:<VSI2QI> 2 "register_operand" "w")]
DOTPROD)
(match_operand:VS 3 "register_operand" "0")))]
"TARGET_DOTPROD"
"<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
[(set_attr "type" "neon_dot<q>")]
)
;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
;; (vector) Dot Product operation and the vectorized optab.
(define_insn "usdot_prod<vsi2qi>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
(match_operand:<VSI2QI> 2 "register_operand" "w")]
UNSPEC_USDOT)
(match_operand:VS 3 "register_operand" "0")))]
"TARGET_I8MM"
"usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
[(set_attr "type" "neon_dot<q>")]
)
;; These instructions map to the __builtins for the Dot Product
;; indexed operations.
(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
(match_operand:V8QI 3 "register_operand" "<h_con>")
(match_operand:SI 4 "immediate_operand" "i")]
DOTPROD)
(match_operand:VS 1 "register_operand" "0")))]
"TARGET_DOTPROD"
{
operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
}
[(set_attr "type" "neon_dot<q>")]
)
(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
(match_operand:V16QI 3 "register_operand" "<h_con>")
(match_operand:SI 4 "immediate_operand" "i")]
DOTPROD)
(match_operand:VS 1 "register_operand" "0")))]
"TARGET_DOTPROD"
{
operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
}
[(set_attr "type" "neon_dot<q>")]
)
;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
;; (by element) Dot Product operations.
(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
(match_operand:VB 3 "register_operand" "w")
(match_operand:SI 4 "immediate_operand" "i")]
DOTPROD_I8MM)
(match_operand:VS 1 "register_operand" "0")))]
"TARGET_I8MM"
{
int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
int lane = INTVAL (operands[4]);
operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
}
[(set_attr "type" "neon_dot<VS:q>")]
)
(define_expand "copysign<mode>3"
[(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")]
"TARGET_FLOAT && TARGET_SIMD"
{
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
emit_move_insn (v_bitmask,
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
HOST_WIDE_INT_M1U << bits));
emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
operands[2], operands[1]));
DONE;
}
)
(define_insn "mul_lane<mode>3"
[(set (match_operand:VMULD 0 "register_operand" "=w")
(mult:VMULD
(vec_duplicate:VMULD
(vec_select:<VEL>
(match_operand:<VCOND> 2 "register_operand" "<h_con>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
(match_operand:VMULD 1 "register_operand" "w")))]
"TARGET_SIMD"
{
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
)
(define_insn "mul_laneq<mode>3"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL
(vec_duplicate:VMUL
(vec_select:<VEL>
(match_operand:<VCONQ> 2 "register_operand" "<h_con>")
(parallel [(match_operand:SI 3 "immediate_operand")])))
(match_operand:VMUL 1 "register_operand" "w")))]
"TARGET_SIMD"
{
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
)
(define_insn "mul_n<mode>3"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL
(vec_duplicate:VMUL
(match_operand:<VEL> 2 "register_operand" "<h_con>"))
(match_operand:VMUL 1 "register_operand" "w")))]
"TARGET_SIMD"
"<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
)
[gen/AArch64] Generate helpers for substituting iterator values into pattern names Given a pattern like: (define_insn "aarch64_frecpe<mode>" ...) the SVE ACLE implementation wants to generate the pattern for a particular (non-constant) mode. This patch automatically generates helpers to do that, specifically: // Return CODE_FOR_nothing on failure. insn_code maybe_code_for_aarch64_frecpe (machine_mode); // Assert that the code exists. insn_code code_for_aarch64_frecpe (machine_mode); // Return NULL_RTX on failure. rtx maybe_gen_aarch64_frecpe (machine_mode, rtx, rtx); // Assert that generation succeeds. rtx gen_aarch64_frecpe (machine_mode, rtx, rtx); Many patterns don't have sensible names when all <...>s are removed. E.g. "<optab><mode>2" would give a base name "2". The new functions therefore require explicit opt-in, which should also help to reduce code bloat. The (arbitrary) opt-in syntax I went for was to prefix the pattern name with '@', similarly to the existing '*' marker. The patch also makes config/aarch64 use the new routines in cases where they obviously apply. This was mostly straight-forward, but it seemed odd that we defined: aarch64_reload_movcp<...><P:mode> but then only used it with DImode, never SImode. If we should be using Pmode instead of DImode, then that's a simple change, but should probably be a separate patch. 2018-08-02 Richard Sandiford <richard.sandiford@arm.com> gcc/ * doc/md.texi: Expand the documentation of instruction names to mention port-local uses. Document '@' in pattern names. * read-md.h (overloaded_instance, overloaded_name): New structs. (mapping): Declare. (md_reader::handle_overloaded_name): New member function. (md_reader::get_overloads): Likewise. (md_reader::m_first_overload): New member variable. (md_reader::m_next_overload_ptr): Likewise. (md_reader::m_overloads_htab): Likewise. * read-md.c (md_reader::md_reader): Initialize m_first_overload, m_next_overload_ptr and m_overloads_htab. * read-rtl.c (iterator_group): Add "type" and "get_c_token" fields. (get_mode_token, get_code_token, get_int_token): New functions. (map_attr_string): Add an optional argument that passes back the associated iterator. (overloaded_name_hash, overloaded_name_eq_p, named_rtx_p): (md_reader::handle_overloaded_name, add_overload_instance): New functions. (apply_iterators): Handle '@' names. Report an error if '@' is used without iterators. (initialize_iterators): Initialize the new iterator_group fields. * genopinit.c (handle_overloaded_code_for) (handle_overloaded_gen): New functions. (main): Use them to print declarations of maybe_code_for_* and maybe_gen_* functions, and inline definitions of code_for_* and gen_*. * genemit.c (print_overload_arguments, print_overload_test) (handle_overloaded_code_for, handle_overloaded_gen): New functions. (main): Use it to print definitions of maybe_code_for_* and maybe_gen_* functions. * config/aarch64/aarch64.c (aarch64_split_128bit_move): Use gen_aarch64_mov{low,high}_di and gen_aarch64_movdi_{low,high} instead of explicit mode checks. (aarch64_split_simd_combine): Likewise gen_aarch64_simd_combine. (aarch64_split_simd_move): Likewise gen_aarch64_split_simd_mov. (aarch64_emit_load_exclusive): Likewise gen_aarch64_load_exclusive. (aarch64_emit_store_exclusive): Likewise gen_aarch64_store_exclusive. (aarch64_expand_compare_and_swap): Likewise gen_aarch64_compare_and_swap and gen_aarch64_compare_and_swap_lse (aarch64_gen_atomic_cas): Likewise gen_aarch64_atomic_cas. (aarch64_emit_atomic_swap): Likewise gen_aarch64_atomic_swp. (aarch64_constant_pool_reload_icode): Delete. (aarch64_secondary_reload): Use code_for_aarch64_reload_movcp instead of aarch64_constant_pool_reload_icode. Use code_for_aarch64_reload_mov instead of explicit mode checks. (rsqrte_type, get_rsqrte_type, rsqrts_type, get_rsqrts_type): Delete. (aarch64_emit_approx_sqrt): Use gen_aarch64_rsqrte instead of get_rsqrte_type and gen_aarch64_rsqrts instead of gen_rqrts_type. (recpe_type, get_recpe_type, recps_type, get_recps_type): Delete. (aarch64_emit_approx_div): Use gen_aarch64_frecpe instead of get_recpe_type and gen_aarch64_frecps instead of get_recps_type. (aarch64_atomic_load_op_code): Delete. (aarch64_emit_atomic_load_op): Likewise. (aarch64_gen_atomic_ldop): Use UNSPECV_ATOMIC_* instead of aarch64_atomic_load_op_code. Use gen_aarch64_atomic_load instead of aarch64_emit_atomic_load_op. * config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>) (aarch64_reload_movcp<VALL:mode><P:mode>, aarch64_reload_mov<mode>) (aarch64_movdi_<mode>low, aarch64_movdi_<mode>high) (aarch64_mov<mode>high_di, aarch64_mov<mode>low_di): Add a '@' character before the pattern name. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (aarch64_rsqrte<mode>, aarch64_rsqrts<mode>) (aarch64_simd_combine<mode>, aarch64_frecpe<mode>) (aarch64_frecps<mode>): Likewise. * config/aarch64/atomics.md (atomic_compare_and_swap<mode>) (aarch64_compare_and_swap<mode>, aarch64_compare_and_swap<mode>_lse) (aarch64_load_exclusive<mode>, aarch64_store_exclusive<mode>) (aarch64_atomic_swp<mode>, aarch64_atomic_cas<mode>) (aarch64_atomic_load<atomic_ldop><mode>): Likewise. From-SVN: r263251
2018-08-02 12:59:35 +02:00
(define_insn "@aarch64_rsqrte<mode>"
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics gcc/ * config.gcc (aarch64*-*-*): Install arm_fp16.h. * config/aarch64/aarch64-builtins.c (hi_UP): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF mode. (aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise. (fix_trunc<GPF:mode><GPI:mode>2): Likewise. (sqrt<mode>2): Likewise. (abs<mode>2): Likewise. (<optab><mode>hf2): New pattern for HF mode. (<optab>hihf2): Likewise. * config/aarch64/arm_neon.h: Include arm_fp16.h. * config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New. (w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE): Support HF mode. * config/aarch64/arm_fp16.h: New file. (vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16, vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16, vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16, vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16, vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16, vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16, vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16, vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16, vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16, vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16, vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16, vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16, vsqrth_f16): New. From-SVN: r238722
2016-07-25 18:00:28 +02:00
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
UNSPEC_RSQRTE))]
"TARGET_SIMD"
"frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
[gen/AArch64] Generate helpers for substituting iterator values into pattern names Given a pattern like: (define_insn "aarch64_frecpe<mode>" ...) the SVE ACLE implementation wants to generate the pattern for a particular (non-constant) mode. This patch automatically generates helpers to do that, specifically: // Return CODE_FOR_nothing on failure. insn_code maybe_code_for_aarch64_frecpe (machine_mode); // Assert that the code exists. insn_code code_for_aarch64_frecpe (machine_mode); // Return NULL_RTX on failure. rtx maybe_gen_aarch64_frecpe (machine_mode, rtx, rtx); // Assert that generation succeeds. rtx gen_aarch64_frecpe (machine_mode, rtx, rtx); Many patterns don't have sensible names when all <...>s are removed. E.g. "<optab><mode>2" would give a base name "2". The new functions therefore require explicit opt-in, which should also help to reduce code bloat. The (arbitrary) opt-in syntax I went for was to prefix the pattern name with '@', similarly to the existing '*' marker. The patch also makes config/aarch64 use the new routines in cases where they obviously apply. This was mostly straight-forward, but it seemed odd that we defined: aarch64_reload_movcp<...><P:mode> but then only used it with DImode, never SImode. If we should be using Pmode instead of DImode, then that's a simple change, but should probably be a separate patch. 2018-08-02 Richard Sandiford <richard.sandiford@arm.com> gcc/ * doc/md.texi: Expand the documentation of instruction names to mention port-local uses. Document '@' in pattern names. * read-md.h (overloaded_instance, overloaded_name): New structs. (mapping): Declare. (md_reader::handle_overloaded_name): New member function. (md_reader::get_overloads): Likewise. (md_reader::m_first_overload): New member variable. (md_reader::m_next_overload_ptr): Likewise. (md_reader::m_overloads_htab): Likewise. * read-md.c (md_reader::md_reader): Initialize m_first_overload, m_next_overload_ptr and m_overloads_htab. * read-rtl.c (iterator_group): Add "type" and "get_c_token" fields. (get_mode_token, get_code_token, get_int_token): New functions. (map_attr_string): Add an optional argument that passes back the associated iterator. (overloaded_name_hash, overloaded_name_eq_p, named_rtx_p): (md_reader::handle_overloaded_name, add_overload_instance): New functions. (apply_iterators): Handle '@' names. Report an error if '@' is used without iterators. (initialize_iterators): Initialize the new iterator_group fields. * genopinit.c (handle_overloaded_code_for) (handle_overloaded_gen): New functions. (main): Use them to print declarations of maybe_code_for_* and maybe_gen_* functions, and inline definitions of code_for_* and gen_*. * genemit.c (print_overload_arguments, print_overload_test) (handle_overloaded_code_for, handle_overloaded_gen): New functions. (main): Use it to print definitions of maybe_code_for_* and maybe_gen_* functions. * config/aarch64/aarch64.c (aarch64_split_128bit_move): Use gen_aarch64_mov{low,high}_di and gen_aarch64_movdi_{low,high} instead of explicit mode checks. (aarch64_split_simd_combine): Likewise gen_aarch64_simd_combine. (aarch64_split_simd_move): Likewise gen_aarch64_split_simd_mov. (aarch64_emit_load_exclusive): Likewise gen_aarch64_load_exclusive. (aarch64_emit_store_exclusive): Likewise gen_aarch64_store_exclusive. (aarch64_expand_compare_and_swap): Likewise gen_aarch64_compare_and_swap and gen_aarch64_compare_and_swap_lse (aarch64_gen_atomic_cas): Likewise gen_aarch64_atomic_cas. (aarch64_emit_atomic_swap): Likewise gen_aarch64_atomic_swp. (aarch64_constant_pool_reload_icode): Delete. (aarch64_secondary_reload): Use code_for_aarch64_reload_movcp instead of aarch64_constant_pool_reload_icode. Use code_for_aarch64_reload_mov instead of explicit mode checks. (rsqrte_type, get_rsqrte_type, rsqrts_type, get_rsqrts_type): Delete. (aarch64_emit_approx_sqrt): Use gen_aarch64_rsqrte instead of get_rsqrte_type and gen_aarch64_rsqrts instead of gen_rqrts_type. (recpe_type, get_recpe_type, recps_type, get_recps_type): Delete. (aarch64_emit_approx_div): Use gen_aarch64_frecpe instead of get_recpe_type and gen_aarch64_frecps instead of get_recps_type. (aarch64_atomic_load_op_code): Delete. (aarch64_emit_atomic_load_op): Likewise. (aarch64_gen_atomic_ldop): Use UNSPECV_ATOMIC_* instead of aarch64_atomic_load_op_code. Use gen_aarch64_atomic_load instead of aarch64_emit_atomic_load_op. * config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>) (aarch64_reload_movcp<VALL:mode><P:mode>, aarch64_reload_mov<mode>) (aarch64_movdi_<mode>low, aarch64_movdi_<mode>high) (aarch64_mov<mode>high_di, aarch64_mov<mode>low_di): Add a '@' character before the pattern name. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (aarch64_rsqrte<mode>, aarch64_rsqrts<mode>) (aarch64_simd_combine<mode>, aarch64_frecpe<mode>) (aarch64_frecps<mode>): Likewise. * config/aarch64/atomics.md (atomic_compare_and_swap<mode>) (aarch64_compare_and_swap<mode>, aarch64_compare_and_swap<mode>_lse) (aarch64_load_exclusive<mode>, aarch64_store_exclusive<mode>) (aarch64_atomic_swp<mode>, aarch64_atomic_cas<mode>) (aarch64_atomic_load<atomic_ldop><mode>): Likewise. From-SVN: r263251
2018-08-02 12:59:35 +02:00
(define_insn "@aarch64_rsqrts<mode>"
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
UNSPEC_RSQRTS))]
"TARGET_SIMD"
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
Add an rsqrt_optab and IFN_RSQRT internal function All current uses of builtin_reciprocal convert 1.0/sqrt into rsqrt. This patch adds an rsqrt optab and associated internal function for that instead. We can then pick up the vector forms of rsqrt automatically, fixing an AArch64 regression from my internal_fn patches. With that change, builtin_reciprocal only needs to handle target-specific built-in functions. I've restricted the hook to those since, if we need a reciprocal of another standard function later, I think there should be a strong preference for adding a new optab and internal function for it, rather than hiding the code in a backend. Three targets implement builtin_reciprocal: aarch64, i386 and rs6000. i386 and rs6000 already used the obvious rsqrt<mode>2 pattern names for the instructions, so they pick up the new code automatically. aarch64 needs a slight rename. mn10300 is unusual in that its native operation is rsqrt, and sqrt is approximated as 1.0/rsqrt. The port also uses rsqrt<mode>2 for the rsqrt pattern, so after the patch we now pick it up as a native operation. Two other ports define rsqrt patterns: sh and v850. AFAICT these patterns aren't currently used, but I think the patch does what the authors of the patterns would have expected. There's obviously some risk of fallout though. Tested on x86_64-linux-gnu, aarch64-linux-gnu, arm-linux-gnueabihf (as a target without the hooks) and powerpc64-linux-gnu. gcc/ * internal-fn.def (RSQRT): New function. * optabs.def (rsqrt_optab): New optab. * doc/md.texi (rsqrtM2): Document. * target.def (builtin_reciprocal): Replace gcall argument with a function decl. Restrict hook to machine functions. * doc/tm.texi: Regenerate. * targhooks.h (default_builtin_reciprocal): Update prototype. * targhooks.c (default_builtin_reciprocal): Likewise. * tree-ssa-math-opts.c: Include internal-fn.h. (internal_fn_reciprocal): New function. (pass_cse_reciprocals::execute): Call it, and build a call to an internal function on success. Only call targetm.builtin_reciprocal for machine functions. * config/aarch64/aarch64-protos.h (aarch64_builtin_rsqrt): Remove second argument. * config/aarch64/aarch64-builtins.c (aarch64_expand_builtin_rsqrt): Rename aarch64_rsqrt_<mode>2 to rsqrt<mode>2. (aarch64_builtin_rsqrt): Remove md_fn argument and only handle machine functions. * config/aarch64/aarch64.c (use_rsqrt_p): New function. (aarch64_builtin_reciprocal): Replace gcall argument with a function decl. Use use_rsqrt_p. Remove optimize_size check. Only handle machine functions. Update call to aarch64_builtin_rsqrt. (aarch64_optab_supported_p): New function. (TARGET_OPTAB_SUPPORTED_P): Define. * config/aarch64/aarch64-simd.md (aarch64_rsqrt_<mode>2): Rename to... (rsqrt<mode>2): ...this. * config/i386/i386.c (use_rsqrt_p): New function. (ix86_builtin_reciprocal): Replace gcall argument with a function decl. Use use_rsqrt_p. Remove optimize_insn_for_size_p check. Only handle machine functions. (ix86_optab_supported_p): Handle rsqrt_optab. * config/rs6000/rs6000.c (TARGET_OPTAB_SUPPORTED_P): Define. (rs6000_builtin_reciprocal): Replace gcall argument with a function decl. Remove optimize_insn_for_size_p check. Only handle machine functions. (rs6000_optab_supported_p): New function. From-SVN: r231229
2015-12-03 15:31:55 +01:00
(define_expand "rsqrt<mode>2"
[(set (match_operand:VALLF 0 "register_operand")
(unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
UNSPEC_RSQRT))]
"TARGET_SIMD"
{
aarch64_emit_approx_sqrt (operands[0], operands[1], true);
DONE;
})
(define_insn "aarch64_ursqrte<mode>"
[(set (match_operand:VDQ_SI 0 "register_operand" "=w")
(unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
UNSPEC_RSQRTE))]
"TARGET_SIMD"
"ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
(define_insn "*aarch64_mul3_elt_to_64v2df"
[(set (match_operand:DF 0 "register_operand" "=w")
(mult:DF
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand")]))
(match_operand:DF 3 "register_operand" "w")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
}
[(set_attr "type" "neon_fp_mul_d_scalar_q")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "neg<mode>2"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"neg\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_neg<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "abs<mode>2"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"abs\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_abs<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; The intrinsic version of integer ABS must not be allowed to
;; combine with any operation with an integerated ABS step, such
;; as SABD.
(define_insn "aarch64_abs<mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
(unspec:VSDQ_I_DI
[(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
UNSPEC_ABS))]
"TARGET_SIMD"
"abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_abs<q>")]
)
;; It's tempting to represent SABD as ABS (MINUS op1 op2).
;; This isn't accurate as ABS treats always its input as a signed value.
;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
;; Whereas SABD would return 192 (-64 signed) on the above example.
;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
(define_insn "aarch64_<su>abd<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(minus:VDQ_BHSI
(USMAX:VDQ_BHSI
(match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w"))
(<max_opp>:VDQ_BHSI
(match_dup 1)
(match_dup 2))))]
"TARGET_SIMD"
"<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_abd<q>")]
)
(define_insn "aarch64_<sur>abdl<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
(match_operand:VD_BHSI 2 "register_operand" "w")]
ABDL))]
"TARGET_SIMD"
"<sur>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_abd<q>")]
)
(define_insn "aarch64_<sur>abdl2<mode>"
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
[(set (match_operand:<VDBLW> 0 "register_operand" "=w")
(unspec:<VDBLW> [(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 2 "register_operand" "w")]
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
ABDL2))]
"TARGET_SIMD"
"<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_abd<q>")]
)
(define_insn "aarch64_<sur>abal<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(unspec:<VWIDE> [(match_operand:VD_BHSI 2 "register_operand" "w")
(match_operand:VD_BHSI 3 "register_operand" "w")
(match_operand:<VWIDE> 1 "register_operand" "0")]
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
ABAL))]
"TARGET_SIMD"
"<sur>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
[(set_attr "type" "neon_arith_acc<q>")]
)
(define_insn "aarch64_<sur>abal2<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "register_operand" "w")
(match_operand:<VWIDE> 1 "register_operand" "0")]
ABAL2))]
"TARGET_SIMD"
"<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_arith_acc<q>")]
)
(define_insn "aarch64_<sur>adalp<mode>"
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
[(set (match_operand:<VDBLW> 0 "register_operand" "=w")
(unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
(match_operand:<VDBLW> 1 "register_operand" "0")]
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
ADALP))]
"TARGET_SIMD"
"<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
[(set_attr "type" "neon_reduc_add<q>")]
)
;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
;; inputs in operands 1 and 2. The sequence also has to perform a widening
;; reduction of the difference into a V4SI vector and accumulate that into
;; operand 3 before copying that into the result operand 0.
;; Perform that with a sequence of:
;; UABDL2 tmp.8h, op1.16b, op2.16b
;; UABAL tmp.8h, op1.8b, op2.8b
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
;; UADALP op3.4s, tmp.8h
;; MOV op0, op3 // should be eliminated in later passes.
;;
;; For TARGET_DOTPROD we do:
;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
;; UABD tmp2.16b, op1.16b, op2.16b
;; UDOT op3.4s, tmp2.16b, tmp1.16b
;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
;;
;; The signed version just uses the signed variants of the above instructions
;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
;; unsigned.
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
(define_expand "<sur>sadv16qi"
[(use (match_operand:V4SI 0 "register_operand"))
(unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
(use (match_operand:V16QI 2 "register_operand"))] ABAL)
(use (match_operand:V4SI 3 "register_operand"))]
"TARGET_SIMD"
{
if (TARGET_DOTPROD)
{
rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
rtx abd = gen_reg_rtx (V16QImode);
emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
DONE;
}
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
rtx reduc = gen_reg_rtx (V8HImode);
emit_insn (gen_aarch64_<sur>abdl2v16qi (reduc, operands[1],
operands[2]));
emit_insn (gen_aarch64_<sur>abalv8qi (reduc, reduc,
gen_lowpart (V8QImode, operands[1]),
gen_lowpart (V8QImode,
operands[2])));
emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
[AArch64] Implement usadv16qi and ssadv16qi standard names This patch implements the usadv16qi and ssadv16qi standard names. See the thread at on gcc@gcc.gnu.org [1] for background. The V16QImode variant is important to get right as it is the most commonly used pattern: reducing vectors of bytes into an int. The midend expects the optab to compute the absolute differences of operands 1 and 2 and reduce them while widening along the way up to SImode. So the inputs are V16QImode and the output is V4SImode. I've tried out a few different strategies for that, the one I settled with is to emit: UABDL2 tmp.8h, op1.16b, op2.16b UABAL tmp.8h, op1.16b, op2.16b UADALP op3.4s, tmp.8h To work through the semantics let's say operands 1 and 2 are: op1 { a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15 } op2 { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 } op3 { c0, c1, c2, c3 } The UABDL2 takes the upper V8QI elements, computes their absolute differences, widens them and stores them into the V8HImode tmp: tmp { ABS(a[8]-b[8]), ABS(a[9]-b[9]), ABS(a[10]-b[10]), ABS(a[11]-b[11]), ABS(a[12]-b[12]), ABS(a[13]-b[13]), ABS(a[14]-b[14]), ABS(a[15]-b[15]) } The UABAL after that takes the lower V8QI elements, computes their absolute differences, widens them and accumulates them into the V8HImode tmp from the previous step: tmp { ABS(a[8]-b[8])+ABS (a[0]-b[0]), ABS(a[9]-b[9])+ABS(a[1]-b[1]), ABS(a[10]-b[10])+ABS(a[2]-b[2]), ABS(a[11]-b[11])+ABS(a[3]-b[3]), ABS(a[12]-b[12])+ABS(a[4]-b[4]), ABS(a[13]-b[13])+ABS(a[5]-b[5]), ABS(a[14]-b[14])+ABS(a[6]-b[6]), ABS(a[15]-b[15])+ABS(a[7]-b[7]) } Finally the UADALP does a pairwise widening reduction and accumulation into the V4SImode op3: op3 { c0+ABS(a[8]-b[8])+ABS(a[0]-b[0])+ABS(a[9]-b[9])+ABS(a[1]-b[1]), c1+ABS(a[10]-b[10])+ABS(a[2]-b[2])+ABS(a[11]-b[11])+ABS(a[3]-b[3]), c2+ABS(a[12]-b[12])+ABS(a[4]-b[4])+ABS(a[13]-b[13])+ABS(a[5]-b[5]), c3+ABS(a[14]-b[14])+ABS(a[6]-b[6])+ABS(a[15]-b[15])+ABS(a[7]-b[7]) } (sorry for the text dump) Remember, according to [1] the exact reduction sequence doesn't matter (for integer arithmetic at least). I've considered other sequences as well (thanks Wilco), for example * UABD + UADDLP + UADALP * UABLD2 + UABDL + UADALP + UADALP I ended up settling in the sequence in this patch as it's short (3 instructions) and in the future we can potentially look to optimise multiple occurrences of these into something even faster (for example accumulating into H registers for longer before doing a single UADALP in the end to accumulate into the final S register). If your microarchitecture has some some strong preferences for a particular sequence, please let me know or, even better, propose a patch to parametrise the generation sequence by code (or the appropriate RTX cost). This expansion allows the vectoriser to avoid unpacking the bytes in two steps and performing V4SI arithmetic on them. So, for the code: unsigned char pix1[N], pix2[N]; int foo (void) { int i_sum = 0; int i; for (i = 0; i < 16; i++) i_sum += __builtin_abs (pix1[i] - pix2[i]); return i_sum; } we now generate on aarch64: foo: adrp x1, pix1 add x1, x1, :lo12:pix1 movi v0.4s, 0 adrp x0, pix2 add x0, x0, :lo12:pix2 ldr q2, [x1] ldr q3, [x0] uabdl2 v1.8h, v2.16b, v3.16b uabal v1.8h, v2.8b, v3.8b uadalp v0.4s, v1.8h addv s0, v0.4s umov w0, v0.s[0] ret instead of: foo: adrp x1, pix1 adrp x0, pix2 add x1, x1, :lo12:pix1 add x0, x0, :lo12:pix2 ldr q0, [x1] ldr q4, [x0] ushll v1.8h, v0.8b, 0 ushll2 v0.8h, v0.16b, 0 ushll v2.8h, v4.8b, 0 ushll2 v4.8h, v4.16b, 0 usubl v3.4s, v1.4h, v2.4h usubl2 v1.4s, v1.8h, v2.8h usubl v2.4s, v0.4h, v4.4h usubl2 v0.4s, v0.8h, v4.8h abs v3.4s, v3.4s abs v1.4s, v1.4s abs v2.4s, v2.4s abs v0.4s, v0.4s add v1.4s, v3.4s, v1.4s add v1.4s, v2.4s, v1.4s add v0.4s, v0.4s, v1.4s addv s0, v0.4s umov w0, v0.s[0] ret So I expect this new expansion to be better than the status quo in any case. Bootstrapped and tested on aarch64-none-linux-gnu. This gives about 8% on 525.x264_r from SPEC2017 on a Cortex-A72. * config/aarch64/aarch64.md ("unspec"): Define UNSPEC_SABAL, UNSPEC_SABDL2, UNSPEC_SADALP, UNSPEC_UABAL, UNSPEC_UABDL2, UNSPEC_UADALP values. * config/aarch64/iterators.md (ABAL): New int iterator. (ABDL2): Likewise. (ADALP): Likewise. (sur): Add mappings for the above. * config/aarch64/aarch64-simd.md (aarch64_<sur>abdl2<mode>_3): New define_insn. (aarch64_<sur>abal<mode>_4): Likewise. (aarch64_<sur>adalp<mode>_3): Likewise. (<sur>sadv16qi): New define_expand. * gcc.c-torture/execute/ssad-run.c: New test. * gcc.c-torture/execute/usad-run.c: Likewise. * gcc.target/aarch64/ssadv16qi.c: Likewise. * gcc.target/aarch64/usadv16qi.c: Likewise. From-SVN: r260437
2018-05-21 13:21:07 +02:00
emit_move_insn (operands[0], operands[3]);
DONE;
}
)
(define_insn "aarch64_<su>aba<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(plus:VDQ_BHSI (minus:VDQ_BHSI
(USMAX:VDQ_BHSI
(match_operand:VDQ_BHSI 2 "register_operand" "w")
(match_operand:VDQ_BHSI 3 "register_operand" "w"))
(<max_opp>:VDQ_BHSI
(match_dup 2)
(match_dup 3)))
(match_operand:VDQ_BHSI 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_arith_acc<q>")]
)
(define_insn "fabd<mode>3"
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(abs:VHSDF_HSDF
(minus:VHSDF_HSDF
(match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
"TARGET_SIMD"
"fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_abd_<stype><q>")]
)
;; For AND (vector, register) and BIC (vector, immediate)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "and<mode>3"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
(match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
switch (which_alternative)
{
case 0:
return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
case 1:
[AArch64] Rewrite aarch64_simd_valid_immediate This patch reworks aarch64_simd_valid_immediate so that it's easier to add SVE support. The main changes are: - make simd_immediate_info easier to construct - replace the while (1) { ... break; } blocks with checks that use the full 64-bit value of the constant - treat floating-point modes as integers if they aren't valid as floating-point values 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_output_simd_mov_immediate): Remove the mode argument. (aarch64_simd_valid_immediate): Remove the mode and inverse arguments. * config/aarch64/iterators.md (bitsize): New iterator. * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>, and<mode>3) (ior<mode>3): Update calls to aarch64_output_simd_mov_immediate. * config/aarch64/constraints.md (Do, Db, Dn): Update calls to aarch64_simd_valid_immediate. * config/aarch64/predicates.md (aarch64_reg_or_orr_imm): Likewise. (aarch64_reg_or_bic_imm): Likewise. * config/aarch64/aarch64.c (simd_immediate_info): Replace mvn with an insn_type enum and msl with a modifier_type enum. Replace element_width with a scalar_mode. Change the shift to unsigned int. Add constructors for scalar_float_mode and scalar_int_mode elements. (aarch64_vect_float_const_representable_p): Delete. (aarch64_can_const_movi_rtx_p) (aarch64_simd_scalar_immediate_valid_for_move) (aarch64_simd_make_constant): Update call to aarch64_simd_valid_immediate. (aarch64_advsimd_valid_immediate_hs): New function. (aarch64_advsimd_valid_immediate): Likewise. (aarch64_simd_valid_immediate): Remove mode and inverse arguments. Rewrite to use the above. Use const_vec_duplicate_p to detect duplicated constants and use aarch64_float_const_zero_rtx_p and aarch64_float_const_representable_p on the result. (aarch64_output_simd_mov_immediate): Remove mode argument. Update call to aarch64_simd_valid_immediate and use of simd_immediate_info. (aarch64_output_scalar_simd_mov_immediate): Update call accordingly. gcc/testsuite/ * gcc.target/aarch64/vect-movi.c (movi_float_lsl24): New function. (main): Call it. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256205
2018-01-03 22:43:44 +01:00
return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
AARCH64_CHECK_BIC);
default:
gcc_unreachable ();
}
}
[(set_attr "type" "neon_logic<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; For ORR (vector, register) and ORR (vector, immediate)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "ior<mode>3"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
(match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
switch (which_alternative)
{
case 0:
return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
case 1:
[AArch64] Rewrite aarch64_simd_valid_immediate This patch reworks aarch64_simd_valid_immediate so that it's easier to add SVE support. The main changes are: - make simd_immediate_info easier to construct - replace the while (1) { ... break; } blocks with checks that use the full 64-bit value of the constant - treat floating-point modes as integers if they aren't valid as floating-point values 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_output_simd_mov_immediate): Remove the mode argument. (aarch64_simd_valid_immediate): Remove the mode and inverse arguments. * config/aarch64/iterators.md (bitsize): New iterator. * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>, and<mode>3) (ior<mode>3): Update calls to aarch64_output_simd_mov_immediate. * config/aarch64/constraints.md (Do, Db, Dn): Update calls to aarch64_simd_valid_immediate. * config/aarch64/predicates.md (aarch64_reg_or_orr_imm): Likewise. (aarch64_reg_or_bic_imm): Likewise. * config/aarch64/aarch64.c (simd_immediate_info): Replace mvn with an insn_type enum and msl with a modifier_type enum. Replace element_width with a scalar_mode. Change the shift to unsigned int. Add constructors for scalar_float_mode and scalar_int_mode elements. (aarch64_vect_float_const_representable_p): Delete. (aarch64_can_const_movi_rtx_p) (aarch64_simd_scalar_immediate_valid_for_move) (aarch64_simd_make_constant): Update call to aarch64_simd_valid_immediate. (aarch64_advsimd_valid_immediate_hs): New function. (aarch64_advsimd_valid_immediate): Likewise. (aarch64_simd_valid_immediate): Remove mode and inverse arguments. Rewrite to use the above. Use const_vec_duplicate_p to detect duplicated constants and use aarch64_float_const_zero_rtx_p and aarch64_float_const_representable_p on the result. (aarch64_output_simd_mov_immediate): Remove mode argument. Update call to aarch64_simd_valid_immediate and use of simd_immediate_info. (aarch64_output_scalar_simd_mov_immediate): Update call accordingly. gcc/testsuite/ * gcc.target/aarch64/vect-movi.c (movi_float_lsl24): New function. (main): Call it. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256205
2018-01-03 22:43:44 +01:00
return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
AARCH64_CHECK_ORR);
default:
gcc_unreachable ();
}
}
[(set_attr "type" "neon_logic<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "xor<mode>3"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
[(set_attr "type" "neon_logic<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "one_cmpl<mode>2"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"not\t%0.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_logic<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_simd_vec_set<mode>"
[AArch64] Unify vec_set patterns, support floating-point vector modes properly We've a deficiency in our vec_set family of patterns. We don't support directly loading a vector lane using LD1 for V2DImode and all the vector floating-point modes. We do do it correctly for the other integer vector modes (V4SI, V8HI etc) though. The alternatives on the relative floating-point patterns only allow a register-to-register INS instruction. That means if we want to load a value into a vector lane we must first load it into a scalar register and then perform an INS, which is wasteful. There is also an explicit V2DI vec_set expander dangling around for no reason that I can see. It seems to do the exact same things as the other vec_set expanders. This patch removes that. It now unifies all vec_set expansions into a single "vec_set<mode>" define_expand using the catch-all VALL_F16 iterator. With this patch we avoid loading values into scalar registers and then doing an explicit INS on them to move them into the desired vector lanes. For example for: typedef float v4sf __attribute__ ((vector_size (16))); typedef long long v2di __attribute__ ((vector_size (16))); v2di foo_v2di (long long *a, long long *b) { v2di res = { *a, *b }; return res; } v4sf foo_v4sf (float *a, float *b, float *c, float *d) { v4sf res = { *a, *b, *c, *d }; return res; } we currently generate: foo_v2di: ldr d0, [x0] ldr x0, [x1] ins v0.d[1], x0 ret foo_v4sf: ldr s0, [x0] ldr s3, [x1] ldr s2, [x2] ldr s1, [x3] ins v0.s[1], v3.s[0] ins v0.s[2], v2.s[0] ins v0.s[3], v1.s[0] ret but with this patch we generate the much cleaner: foo_v2di: ldr d0, [x0] ld1 {v0.d}[1], [x1] ret foo_v4sf: ldr s0, [x0] ld1 {v0.s}[1], [x1] ld1 {v0.s}[2], [x2] ld1 {v0.s}[3], [x3] ret * config/aarch64/aarch64-simd.md (vec_set<mode>): Use VALL_F16 mode iterator. Delete separate integer-mode vec_set<mode> expander. (aarch64_simd_vec_setv2di): Delete. (vec_setv2di): Delete. (aarch64_simd_vec_set<mode>): Delete all other patterns with that name. Use VALL_F16 mode iterator. Add LD1 alternative and use vwcore for the "w, r" alternative. * gcc.target/aarch64/vect-init-ld1.c: New test. From-SVN: r260351
2018-05-18 10:52:30 +02:00
[(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
(vec_merge:VALL_F16
(vec_duplicate:VALL_F16
(match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
[AArch64] Unify vec_set patterns, support floating-point vector modes properly We've a deficiency in our vec_set family of patterns. We don't support directly loading a vector lane using LD1 for V2DImode and all the vector floating-point modes. We do do it correctly for the other integer vector modes (V4SI, V8HI etc) though. The alternatives on the relative floating-point patterns only allow a register-to-register INS instruction. That means if we want to load a value into a vector lane we must first load it into a scalar register and then perform an INS, which is wasteful. There is also an explicit V2DI vec_set expander dangling around for no reason that I can see. It seems to do the exact same things as the other vec_set expanders. This patch removes that. It now unifies all vec_set expansions into a single "vec_set<mode>" define_expand using the catch-all VALL_F16 iterator. With this patch we avoid loading values into scalar registers and then doing an explicit INS on them to move them into the desired vector lanes. For example for: typedef float v4sf __attribute__ ((vector_size (16))); typedef long long v2di __attribute__ ((vector_size (16))); v2di foo_v2di (long long *a, long long *b) { v2di res = { *a, *b }; return res; } v4sf foo_v4sf (float *a, float *b, float *c, float *d) { v4sf res = { *a, *b, *c, *d }; return res; } we currently generate: foo_v2di: ldr d0, [x0] ldr x0, [x1] ins v0.d[1], x0 ret foo_v4sf: ldr s0, [x0] ldr s3, [x1] ldr s2, [x2] ldr s1, [x3] ins v0.s[1], v3.s[0] ins v0.s[2], v2.s[0] ins v0.s[3], v1.s[0] ret but with this patch we generate the much cleaner: foo_v2di: ldr d0, [x0] ld1 {v0.d}[1], [x1] ret foo_v4sf: ldr s0, [x0] ld1 {v0.s}[1], [x1] ld1 {v0.s}[2], [x2] ld1 {v0.s}[3], [x3] ret * config/aarch64/aarch64-simd.md (vec_set<mode>): Use VALL_F16 mode iterator. Delete separate integer-mode vec_set<mode> expander. (aarch64_simd_vec_setv2di): Delete. (vec_setv2di): Delete. (aarch64_simd_vec_set<mode>): Delete all other patterns with that name. Use VALL_F16 mode iterator. Add LD1 alternative and use vwcore for the "w, r" alternative. * gcc.target/aarch64/vect-init-ld1.c: New test. From-SVN: r260351
2018-05-18 10:52:30 +02:00
(match_operand:VALL_F16 3 "register_operand" "0,0,0")
(match_operand:SI 2 "immediate_operand" "i,i,i")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
switch (which_alternative)
{
case 0:
return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
[AArch64] Unify vec_set patterns, support floating-point vector modes properly We've a deficiency in our vec_set family of patterns. We don't support directly loading a vector lane using LD1 for V2DImode and all the vector floating-point modes. We do do it correctly for the other integer vector modes (V4SI, V8HI etc) though. The alternatives on the relative floating-point patterns only allow a register-to-register INS instruction. That means if we want to load a value into a vector lane we must first load it into a scalar register and then perform an INS, which is wasteful. There is also an explicit V2DI vec_set expander dangling around for no reason that I can see. It seems to do the exact same things as the other vec_set expanders. This patch removes that. It now unifies all vec_set expansions into a single "vec_set<mode>" define_expand using the catch-all VALL_F16 iterator. With this patch we avoid loading values into scalar registers and then doing an explicit INS on them to move them into the desired vector lanes. For example for: typedef float v4sf __attribute__ ((vector_size (16))); typedef long long v2di __attribute__ ((vector_size (16))); v2di foo_v2di (long long *a, long long *b) { v2di res = { *a, *b }; return res; } v4sf foo_v4sf (float *a, float *b, float *c, float *d) { v4sf res = { *a, *b, *c, *d }; return res; } we currently generate: foo_v2di: ldr d0, [x0] ldr x0, [x1] ins v0.d[1], x0 ret foo_v4sf: ldr s0, [x0] ldr s3, [x1] ldr s2, [x2] ldr s1, [x3] ins v0.s[1], v3.s[0] ins v0.s[2], v2.s[0] ins v0.s[3], v1.s[0] ret but with this patch we generate the much cleaner: foo_v2di: ldr d0, [x0] ld1 {v0.d}[1], [x1] ret foo_v4sf: ldr s0, [x0] ld1 {v0.s}[1], [x1] ld1 {v0.s}[2], [x2] ld1 {v0.s}[3], [x3] ret * config/aarch64/aarch64-simd.md (vec_set<mode>): Use VALL_F16 mode iterator. Delete separate integer-mode vec_set<mode> expander. (aarch64_simd_vec_setv2di): Delete. (vec_setv2di): Delete. (aarch64_simd_vec_set<mode>): Delete all other patterns with that name. Use VALL_F16 mode iterator. Add LD1 alternative and use vwcore for the "w, r" alternative. * gcc.target/aarch64/vect-init-ld1.c: New test. From-SVN: r260351
2018-05-18 10:52:30 +02:00
case 1:
return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
case 2:
return "ld1\\t{%0.<Vetype>}[%p2], %1";
default:
gcc_unreachable ();
}
}
[AArch64] Unify vec_set patterns, support floating-point vector modes properly We've a deficiency in our vec_set family of patterns. We don't support directly loading a vector lane using LD1 for V2DImode and all the vector floating-point modes. We do do it correctly for the other integer vector modes (V4SI, V8HI etc) though. The alternatives on the relative floating-point patterns only allow a register-to-register INS instruction. That means if we want to load a value into a vector lane we must first load it into a scalar register and then perform an INS, which is wasteful. There is also an explicit V2DI vec_set expander dangling around for no reason that I can see. It seems to do the exact same things as the other vec_set expanders. This patch removes that. It now unifies all vec_set expansions into a single "vec_set<mode>" define_expand using the catch-all VALL_F16 iterator. With this patch we avoid loading values into scalar registers and then doing an explicit INS on them to move them into the desired vector lanes. For example for: typedef float v4sf __attribute__ ((vector_size (16))); typedef long long v2di __attribute__ ((vector_size (16))); v2di foo_v2di (long long *a, long long *b) { v2di res = { *a, *b }; return res; } v4sf foo_v4sf (float *a, float *b, float *c, float *d) { v4sf res = { *a, *b, *c, *d }; return res; } we currently generate: foo_v2di: ldr d0, [x0] ldr x0, [x1] ins v0.d[1], x0 ret foo_v4sf: ldr s0, [x0] ldr s3, [x1] ldr s2, [x2] ldr s1, [x3] ins v0.s[1], v3.s[0] ins v0.s[2], v2.s[0] ins v0.s[3], v1.s[0] ret but with this patch we generate the much cleaner: foo_v2di: ldr d0, [x0] ld1 {v0.d}[1], [x1] ret foo_v4sf: ldr s0, [x0] ld1 {v0.s}[1], [x1] ld1 {v0.s}[2], [x2] ld1 {v0.s}[3], [x3] ret * config/aarch64/aarch64-simd.md (vec_set<mode>): Use VALL_F16 mode iterator. Delete separate integer-mode vec_set<mode> expander. (aarch64_simd_vec_setv2di): Delete. (vec_setv2di): Delete. (aarch64_simd_vec_set<mode>): Delete all other patterns with that name. Use VALL_F16 mode iterator. Add LD1 alternative and use vwcore for the "w, r" alternative. * gcc.target/aarch64/vect-init-ld1.c: New test. From-SVN: r260351
2018-05-18 10:52:30 +02:00
[(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "@aarch64_simd_vec_copy_lane<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_merge:VALL_F16
(vec_duplicate:VALL_F16
(vec_select:<VEL>
(match_operand:VALL_F16 3 "register_operand" "w")
(parallel
[(match_operand:SI 4 "immediate_operand" "i")])))
(match_operand:VALL_F16 1 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_ins<q>")]
)
(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
[(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
(vec_merge:VALL_F16_NO_V2Q
(vec_duplicate:VALL_F16_NO_V2Q
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
(parallel
[(match_operand:SI 4 "immediate_operand" "i")])))
(match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
INTVAL (operands[4]));
return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_ins<q>")]
)
(define_expand "signbit<mode>2"
[(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
(use (match_operand:VDQSF 1 "register_operand"))]
"TARGET_SIMD"
{
int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
shift_amount);
operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
shift_vector));
DONE;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_simd_lshr<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"ushr\t%0.<Vtype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_simd_ashr<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
(ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
(match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"@
cmlt\t%0.<Vtype>, %1.<Vtype>, #0
sshr\t%0.<Vtype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "*aarch64_simd_sra<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(plus:VDQ_I
(SHIFTRT:VDQ_I
(match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
(match_operand:VDQ_I 3 "register_operand" "0")))]
"TARGET_SIMD"
"<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_acc<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_simd_imm_shl<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"shl\t%0.<Vtype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_simd_reg_sshl<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_shift_reg<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
UNSPEC_ASHIFT_UNSIGNED))]
"TARGET_SIMD"
"ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_shift_reg<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_simd_reg_shl<mode>_signed"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
UNSPEC_ASHIFT_SIGNED))]
"TARGET_SIMD"
"sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_shift_reg<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "ashl<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
(match_operand:VDQ_I 1 "register_operand")
(match_operand:SI 2 "general_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
int shift_amount;
if (CONST_INT_P (operands[2]))
{
shift_amount = INTVAL (operands[2]);
if (shift_amount >= 0 && shift_amount < bit_width)
{
rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
shift_amount);
emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
operands[1],
tmp));
DONE;
}
}
operands[2] = force_reg (SImode, operands[2]);
rtx tmp = gen_reg_rtx (<MODE>mode);
emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
operands[2],
0)));
emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
DONE;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "lshr<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
(match_operand:VDQ_I 1 "register_operand")
(match_operand:SI 2 "general_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
int shift_amount;
if (CONST_INT_P (operands[2]))
{
shift_amount = INTVAL (operands[2]);
if (shift_amount > 0 && shift_amount <= bit_width)
{
rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
shift_amount);
emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
operands[1],
tmp));
DONE;
}
}
operands[2] = force_reg (SImode, operands[2]);
rtx tmp = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (<MODE>mode);
emit_insn (gen_negsi2 (tmp, operands[2]));
emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
convert_to_mode (<VEL>mode, tmp, 0)));
emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
tmp1));
DONE;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "ashr<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
(match_operand:VDQ_I 1 "register_operand")
(match_operand:SI 2 "general_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
int shift_amount;
if (CONST_INT_P (operands[2]))
{
shift_amount = INTVAL (operands[2]);
if (shift_amount > 0 && shift_amount <= bit_width)
{
rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
shift_amount);
emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
operands[1],
tmp));
DONE;
}
}
operands[2] = force_reg (SImode, operands[2]);
rtx tmp = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (<MODE>mode);
emit_insn (gen_negsi2 (tmp, operands[2]));
emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
tmp, 0)));
emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
tmp1));
DONE;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "vashl<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
(match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
operands[2]));
DONE;
})
(define_expand "vashr<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
(match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (<MODE>mode);
emit (gen_neg<mode>2 (neg, operands[2]));
emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
neg));
DONE;
})
;; DI vector shift
(define_expand "aarch64_ashr_simddi"
[(match_operand:DI 0 "register_operand")
(match_operand:DI 1 "register_operand")
(match_operand:SI 2 "aarch64_shift_imm64_di")]
"TARGET_SIMD"
{
/* An arithmetic shift right by 64 fills the result with copies of the sign
bit, just like asr by 63 - however the standard pattern does not handle
a shift by 64. */
if (INTVAL (operands[2]) == 64)
operands[2] = GEN_INT (63);
emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "vlshr<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
(match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
rtx neg = gen_reg_rtx (<MODE>mode);
emit (gen_neg<mode>2 (neg, operands[2]));
emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
neg));
DONE;
})
(define_expand "aarch64_lshr_simddi"
[(match_operand:DI 0 "register_operand")
(match_operand:DI 1 "register_operand")
(match_operand:SI 2 "aarch64_shift_imm64_di")]
"TARGET_SIMD"
{
if (INTVAL (operands[2]) == 64)
emit_move_insn (operands[0], const0_rtx);
else
emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
DONE;
}
)
;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
(define_insn "vec_shr_<mode>"
[(set (match_operand:VD 0 "register_operand" "=w")
(unspec:VD [(match_operand:VD 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
UNSPEC_VEC_SHR))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
return "shl %d0, %d1, %2";
else
return "ushr %d0, %d1, %2";
}
[(set_attr "type" "neon_shift_imm")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "vec_set<mode>"
[(match_operand:VALL_F16 0 "register_operand")
(match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
(match_operand:SI 2 "immediate_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
GEN_INT (elem), operands[0]));
DONE;
}
)
(define_insn "aarch64_mla<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(plus:VDQ_BHSI (mult:VDQ_BHSI
(match_operand:VDQ_BHSI 2 "register_operand" "w")
(match_operand:VDQ_BHSI 3 "register_operand" "w"))
(match_operand:VDQ_BHSI 1 "register_operand" "0")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_mla_<Vetype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "*aarch64_mla_elt<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
(vec_duplicate:VDQHS
(vec_select:<VEL>
(match_operand:VDQHS 1 "register_operand" "<h_con>")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQHS 3 "register_operand" "w"))
(match_operand:VDQHS 4 "register_operand" "0")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
(vec_duplicate:VDQHS
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQHS 3 "register_operand" "w"))
(match_operand:VDQHS 4 "register_operand" "0")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
(define_insn "aarch64_mla_n<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(plus:VDQHS
(mult:VDQHS
(vec_duplicate:VDQHS
(match_operand:<VEL> 3 "register_operand" "<h_con>"))
(match_operand:VDQHS 2 "register_operand" "w"))
(match_operand:VDQHS 1 "register_operand" "0")))]
"TARGET_SIMD"
"mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_mls<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
(mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
(match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_mla_<Vetype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "*aarch64_mls_elt<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 4 "register_operand" "0")
(mult:VDQHS
(vec_duplicate:VDQHS
(vec_select:<VEL>
(match_operand:VDQHS 1 "register_operand" "<h_con>")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQHS 3 "register_operand" "w"))))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 4 "register_operand" "0")
(mult:VDQHS
(vec_duplicate:VDQHS
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQHS 3 "register_operand" "w"))))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
(define_insn "aarch64_mls_n<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(minus:VDQHS
(match_operand:VDQHS 1 "register_operand" "0")
(mult:VDQHS
(vec_duplicate:VDQHS
(match_operand:<VEL> 3 "register_operand" "<h_con>"))
(match_operand:VDQHS 2 "register_operand" "w"))))]
"TARGET_SIMD"
"mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; Max/Min operations.
(define_insn "<su><maxmin><mode>3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_minmax<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "<su><maxmin>v2di3"
[(set (match_operand:V2DI 0 "register_operand")
(MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
(match_operand:V2DI 2 "register_operand")))]
"TARGET_SIMD"
{
enum rtx_code cmp_operator;
rtx cmp_fmt;
switch (<CODE>)
{
case UMIN:
cmp_operator = LTU;
break;
case SMIN:
cmp_operator = LT;
break;
case UMAX:
cmp_operator = GTU;
break;
case SMAX:
cmp_operator = GT;
break;
default:
gcc_unreachable ();
}
cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
emit_insn (gen_vcondv2div2di (operands[0], operands[1],
operands[2], cmp_fmt, operands[1], operands[2]));
DONE;
})
;; Pairwise Integer Max/Min operations.
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
(define_insn "aarch64_<optab>p<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w")]
MAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_minmax<q>")]
)
;; Pairwise FP Max/Min operations.
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
(define_insn "aarch64_<optab>p<mode>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
FMAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_minmax<q>")]
)
;; vec_concat gives a new vector with the low elements from operand 1, and
;; the high elements from operand 2. That is to say, given op1 = { a, b }
;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
;; What that means, is that the RTL descriptions of the below patterns
;; need to change depending on endianness.
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; Narrowing operations.
(define_insn "aarch64_xtn<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
(match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"xtn\\t%0.<Vntype>, %1.<Vtype>"
[(set_attr "type" "neon_move_narrow_q")]
)
(define_insn "aarch64_xtn<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"xtn\\t%0.<Vntype>, %1.<Vtype>"
[(set_attr "type" "neon_move_narrow_q")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_xtn<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand")
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
"TARGET_SIMD"
{
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
CONST0_RTX (<VNARROWQ>mode)));
else
emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
CONST0_RTX (<VNARROWQ>mode)));
/* The intrinsic expects a narrow result, so emit a subreg that will get
optimized away as appropriate. */
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
<VNARROWQ2>mode));
DONE;
}
)
(define_insn "aarch64_xtn2<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"xtn2\t%0.<V2ntype>, %2.<Vtype>"
[(set_attr "type" "neon_move_narrow_q")]
)
(define_insn "aarch64_xtn2<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"xtn2\t%0.<V2ntype>, %2.<Vtype>"
[(set_attr "type" "neon_move_narrow_q")]
)
(define_expand "aarch64_xtn2<mode>"
[(match_operand:<VNARROWQ2> 0 "register_operand")
(match_operand:<VNARROWQ> 1 "register_operand")
(truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
operands[2]));
else
emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
operands[2]));
DONE;
}
)
(define_insn "*aarch64_narrow_trunc<mode>"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(match_operand:VQN 1 "register_operand" "w"))
(truncate:<VNARROWQ>
(match_operand:VQN 2 "register_operand" "w"))))]
"TARGET_SIMD"
{
if (!BYTES_BIG_ENDIAN)
return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
else
return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
}
[(set_attr "type" "neon_permute<q>")]
)
;; Packing doubles.
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "vec_pack_trunc_<mode>"
[(match_operand:<VNARROWD> 0 "register_operand")
(match_operand:VDN 1 "general_operand")
(match_operand:VDN 2 "general_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
rtx tempreg = gen_reg_rtx (<VDBL>mode);
emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
;; Packing quads.
(define_expand "vec_pack_trunc_<mode>"
[(set (match_operand:<VNARROWQ2> 0 "register_operand")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
(truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
"TARGET_SIMD"
{
rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
int hi = BYTES_BIG_ENDIAN ? 1 : 2;
emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
operands[hi]));
else
emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
operands[hi]));
DONE;
}
)
(define_insn "aarch64_shrn<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "aarch64_shrn<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "*aarch64_<srn_op>shrn<mode>_vect"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
"TARGET_SIMD"
"shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
AArch64: Add combine patterns for narrowing shift of half top bits (shuffle) When doing a (narrowing) right shift by half the width of the original type then we are essentially shuffling the top bits from the first number down. If we have a hi/lo pair we can just use a single shuffle instead of needing two shifts. i.e. typedef short int16_t; typedef unsigned short uint16_t; void foo (uint16_t * restrict a, int16_t * restrict d, int n) { for( int i = 0; i < n; i++ ) d[i] = (a[i] * a[i]) >> 16; } now generates: .L4: ldr q0, [x0, x3] umull v1.4s, v0.4h, v0.4h umull2 v0.4s, v0.8h, v0.8h uzp2 v0.8h, v1.8h, v0.8h str q0, [x1, x3] add x3, x3, 16 cmp x4, x3 bne .L4 instead of .L4: ldr q0, [x0, x3] umull v1.4s, v0.4h, v0.4h umull2 v0.4s, v0.8h, v0.8h sshr v1.4s, v1.4s, 16 sshr v0.4s, v0.4s, 16 xtn v1.4h, v1.4s xtn2 v1.8h, v0.4s str q1, [x1, x3] add x3, x3, 16 cmp x4, x3 bne .L4 Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (*aarch64_<srn_op>topbits_shuffle<mode>_le): New. (*aarch64_topbits_shuffle<mode>_le): New. (*aarch64_<srn_op>topbits_shuffle<mode>_be): New. (*aarch64_topbits_shuffle<mode>_be): New. * config/aarch64/predicates.md (aarch64_simd_shift_imm_vec_exact_top): New. gcc/testsuite/ChangeLog: * gcc.target/aarch64/shrn-combine-10.c: New test. * gcc.target/aarch64/shrn-combine-5.c: New test. * gcc.target/aarch64/shrn-combine-6.c: New test. * gcc.target/aarch64/shrn-combine-7.c: New test. * gcc.target/aarch64/shrn-combine-8.c: New test. * gcc.target/aarch64/shrn-combine-9.c: New test.
2021-10-20 18:07:54 +02:00
(define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
(match_dup 2)))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
[(set_attr "type" "neon_permute<q>")]
)
(define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
(truncate:<VNARROWQ>
(SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
(match_dup 2)))))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
[(set_attr "type" "neon_permute<q>")]
)
(define_expand "aarch64_shrn<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand")
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 1 "register_operand")
(match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
"TARGET_SIMD"
{
operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[2]));
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1],
operands[2], CONST0_RTX (<VNARROWQ>mode)));
else
emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1],
operands[2], CONST0_RTX (<VNARROWQ>mode)));
/* The intrinsic expects a narrow result, so emit a subreg that will get
optimized away as appropriate. */
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
<VNARROWQ2>mode));
DONE;
}
)
(define_insn "aarch64_rshrn<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2
"aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "aarch64_rshrn<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
UNSPEC_RSHRN)))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_expand "aarch64_rshrn<mode>"
[(match_operand:<VNARROWQ> 0 "register_operand")
(match_operand:VQN 1 "register_operand")
(match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
"TARGET_SIMD"
{
if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
{
rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
}
else
{
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[2]));
if (BYTES_BIG_ENDIAN)
emit_insn (
gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
operands[2],
CONST0_RTX (<VNARROWQ>mode)));
else
emit_insn (
gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
operands[2],
CONST0_RTX (<VNARROWQ>mode)));
/* The intrinsic expects a narrow result, so emit a subreg that will
get optimized away as appropriate. */
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
<VNARROWQ2>mode));
}
DONE;
}
)
(define_insn "aarch64_shrn2<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "aarch64_shrn2<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(truncate:<VNARROWQ>
(lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3
"aarch64_simd_shift_imm_vec_<vn_mode>")))
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_expand "aarch64_shrn2<mode>"
[(match_operand:<VNARROWQ2> 0 "register_operand")
(match_operand:<VNARROWQ> 1 "register_operand")
(match_operand:VQN 2 "register_operand")
(match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
"TARGET_SIMD"
{
operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[3]));
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1],
operands[2], operands[3]));
else
emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1],
operands[2], operands[3]));
DONE;
}
)
(define_insn "aarch64_rshrn2<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
UNSPEC_RSHRN)))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "aarch64_rshrn2<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
UNSPEC_RSHRN)
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_expand "aarch64_rshrn2<mode>"
[(match_operand:<VNARROWQ2> 0 "register_operand")
(match_operand:<VNARROWQ> 1 "register_operand")
(match_operand:VQN 2 "register_operand")
(match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
"TARGET_SIMD"
{
if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode))
{
rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
operands[2], tmp));
}
else
{
operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[3]));
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0],
operands[1],
operands[2],
operands[3]));
else
emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0],
operands[1],
operands[2],
operands[3]));
}
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; Widening operations.
(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 2 "vect_par_cnst_lo_half" "")
)))]
"TARGET_SIMD"
"<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
[(set_attr "type" "neon_shift_imm_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 2 "vect_par_cnst_hi_half" "")
)))]
"TARGET_SIMD"
"<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
[(set_attr "type" "neon_shift_imm_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "vec_unpack<su>_hi_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
operands[1], p));
DONE;
}
)
(define_expand "vec_unpack<su>_lo_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
operands[1], p));
DONE;
}
)
;; Widening arithmetic.
(define_insn "*aarch64_<su>mlal_lo<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 4 "register_operand" "w")
(match_dup 3))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_insn "aarch64_<su>mlal_hi<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 4 "register_operand" "w")
(match_dup 3))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_expand "aarch64_<su>mlal_hi<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
(match_operand:VQW 3 "register_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
operands[2], p, operands[3]));
DONE;
}
)
(define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(match_operand:<VEL> 4 "register_operand" "<h_con>"))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_expand "aarch64_<su>mlal_hi_n<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
(match_operand:<VEL> 3 "register_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
operands[1], operands[2], p, operands[3]));
DONE;
}
)
(define_insn "*aarch64_<su>mlsl_lo<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 4 "register_operand" "w")
(match_dup 3))))))]
"TARGET_SIMD"
"<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_insn "aarch64_<su>mlsl_hi<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 4 "register_operand" "w")
(match_dup 3))))))]
"TARGET_SIMD"
"<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_expand "aarch64_<su>mlsl_hi<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
(match_operand:VQW 3 "register_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
operands[2], p, operands[3]));
DONE;
}
)
(define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
"TARGET_SIMD"
"<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_expand "aarch64_<su>mlsl_hi_n<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
(match_operand:<VEL> 3 "register_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
operands[1], operands[2], p, operands[3]));
DONE;
}
)
(define_insn "aarch64_<su>mlal<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 2 "register_operand" "w"))
(ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 3 "register_operand" "w")))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_insn "aarch64_<su>mlal_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<h_con>"))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_insn "aarch64_<su>mlsl<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 2 "register_operand" "w"))
(ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 3 "register_operand" "w")))))]
"TARGET_SIMD"
"<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
(define_insn "aarch64_<su>mlsl_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
"TARGET_SIMD"
"<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_dup 3)))))]
"TARGET_SIMD"
"<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
[(set_attr "type" "neon_mul_<Vetype>_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE> (ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 1 "register_operand" "w"))
(ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 2 "register_operand" "w"))))]
"TARGET_SIMD"
"<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_mul_<Vetype>_long")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "vec_widen_<su>mult_lo_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
(ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
operands[1],
operands[2], p));
DONE;
}
)
(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_dup 3)))))]
"TARGET_SIMD"
"<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_mul_<Vetype>_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "vec_widen_<su>mult_hi_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
(ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
operands[1],
operands[2], p));
DONE;
}
)
;; vmull_lane_s16 intrinsics
(define_insn "aarch64_vec_<su>mult_lane<Qlane>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:<VCOND> 1 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:VDQHS 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
"TARGET_SIMD"
{
operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
)
(define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
)
(define_expand "aarch64_<su>mull_hi_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
(match_operand:<VCOND> 2 "register_operand")
(match_operand:SI 3 "immediate_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
operands[1], p, operands[2], operands[3]));
DONE;
}
)
(define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
)
(define_expand "aarch64_<su>mull_hi_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
(match_operand:<VCONQ> 2 "register_operand")
(match_operand:SI 3 "immediate_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
operands[1], p, operands[2], operands[3]));
DONE;
}
)
(define_insn "aarch64_<su>mull_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:VD_HSI 1 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
"TARGET_SIMD"
"<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
[(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
)
(define_insn "aarch64_<su>mull_hi_n<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 1 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
"TARGET_SIMD"
"<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
[(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
)
(define_expand "aarch64_<su>mull_hi_n<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
(match_operand:<VEL> 2 "register_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
operands[2], p));
DONE;
}
)
;; vmlal_lane_s16 intrinsics
(define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:<VCOND> 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:VDQHS 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCOND> 4 "register_operand" "<vwx>")
(parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
)
(define_expand "aarch64_<su>mlal_hi_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
(match_operand:<VCOND> 3 "register_operand")
(match_operand:SI 4 "immediate_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
operands[1], operands[2], p, operands[3], operands[4]));
DONE;
}
)
(define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCONQ> 4 "register_operand" "<vwx>")
(parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
)
(define_expand "aarch64_<su>mlal_hi_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
(match_operand:<VCONQ> 3 "register_operand")
(match_operand:SI 4 "immediate_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
operands[1], operands[2], p, operands[3], operands[4]));
DONE;
}
)
(define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:<VCOND> 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:VDQHS 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCOND> 4 "register_operand" "<vwx>")
(parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
)))]
"TARGET_SIMD"
{
operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
)
(define_expand "aarch64_<su>mlsl_hi_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
(match_operand:<VCOND> 3 "register_operand")
(match_operand:SI 4 "immediate_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
operands[1], operands[2], p, operands[3], operands[4]));
DONE;
}
)
(define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCONQ> 4 "register_operand" "<vwx>")
(parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
)))]
"TARGET_SIMD"
{
operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
}
[(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
)
(define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
(match_operand:<VCONQ> 3 "register_operand")
(match_operand:SI 4 "immediate_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
operands[1], operands[2], p, operands[3], operands[4]));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; FP vector operations.
;; AArch64 AdvSIMD supports single-precision (32-bit) and
;; double-precision (64-bit) floating-point data types and arithmetic as
;; defined by the IEEE 754-2008 standard. This makes them vectorizable
;; without the need for -ffast-math or -funsafe-math-optimizations.
;;
;; Floating-point operations can raise an exception. Vectorizing such
;; operations are safe because of reasons explained below.
;;
;; ARMv8 permits an extension to enable trapped floating-point
;; exception handling, however this is an optional feature. In the
;; event of a floating-point exception being raised by vectorised
;; code then:
;; 1. If trapped floating-point exceptions are available, then a trap
;; will be taken when any lane raises an enabled exception. A trap
;; handler may determine which lane raised the exception.
;; 2. Alternatively a sticky exception flag is set in the
;; floating-point status register (FPSR). Software may explicitly
;; test the exception flags, in which case the tests will either
;; prevent vectorisation, allowing precise identification of the
;; failing operation, or if tested outside of vectorisable regions
;; then the specific operation and lane are not of interest.
;; FP arithmetic operations.
(define_insn "add<mode>3"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_addsub_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "sub<mode>3"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_addsub_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "mul<mode>3"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_mul_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "div<mode>3"
[(set (match_operand:VHSDF 0 "register_operand")
(div:VHSDF (match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")))]
"TARGET_SIMD"
{
if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
DONE;
operands[1] = force_reg (<MODE>mode, operands[1]);
})
(define_insn "*div<mode>3"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_div_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "neg<mode>2"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"fneg\\t%0.<Vtype>, %1.<Vtype>"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_fp_neg_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "abs<mode>2"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"fabs\\t%0.<Vtype>, %1.<Vtype>"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_fp_abs_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_float_mla<mode>"
[(set (match_operand:VDQF_DF 0 "register_operand")
(plus:VDQF_DF
(mult:VDQF_DF
(match_operand:VDQF_DF 2 "register_operand")
(match_operand:VDQF_DF 3 "register_operand"))
(match_operand:VDQF_DF 1 "register_operand")))]
"TARGET_SIMD"
{
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
DONE;
}
)
(define_expand "aarch64_float_mls<mode>"
[(set (match_operand:VDQF_DF 0 "register_operand")
(minus:VDQF_DF
(match_operand:VDQF_DF 1 "register_operand")
(mult:VDQF_DF
(match_operand:VDQF_DF 2 "register_operand")
(match_operand:VDQF_DF 3 "register_operand"))))]
"TARGET_SIMD"
{
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
DONE;
}
)
(define_expand "aarch64_float_mla_n<mode>"
[(set (match_operand:VDQSF 0 "register_operand")
(plus:VDQSF
(mult:VDQSF
(vec_duplicate:VDQSF
(match_operand:<VEL> 3 "register_operand"))
(match_operand:VDQSF 2 "register_operand"))
(match_operand:VDQSF 1 "register_operand")))]
"TARGET_SIMD"
{
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
DONE;
}
)
(define_expand "aarch64_float_mls_n<mode>"
[(set (match_operand:VDQSF 0 "register_operand")
(minus:VDQSF
(match_operand:VDQSF 1 "register_operand")
(mult:VDQSF
(vec_duplicate:VDQSF
(match_operand:<VEL> 3 "register_operand"))
(match_operand:VDQSF 2 "register_operand"))))]
"TARGET_SIMD"
{
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
DONE;
}
)
(define_expand "aarch64_float_mla_lane<mode>"
[(set (match_operand:VDQSF 0 "register_operand")
(plus:VDQSF
(mult:VDQSF
(vec_duplicate:VDQSF
(vec_select:<VEL>
(match_operand:V2SF 3 "register_operand")
(parallel [(match_operand:SI 4 "immediate_operand")])))
(match_operand:VDQSF 2 "register_operand"))
(match_operand:VDQSF 1 "register_operand")))]
"TARGET_SIMD"
{
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
operands[3], operands[4]));
emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
DONE;
}
)
(define_expand "aarch64_float_mls_lane<mode>"
[(set (match_operand:VDQSF 0 "register_operand")
(minus:VDQSF
(match_operand:VDQSF 1 "register_operand")
(mult:VDQSF
(vec_duplicate:VDQSF
(vec_select:<VEL>
(match_operand:V2SF 3 "register_operand")
(parallel [(match_operand:SI 4 "immediate_operand")])))
(match_operand:VDQSF 2 "register_operand"))))]
"TARGET_SIMD"
{
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
operands[3], operands[4]));
emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
DONE;
}
)
(define_expand "aarch64_float_mla_laneq<mode>"
[(set (match_operand:VDQSF 0 "register_operand")
(plus:VDQSF
(mult:VDQSF
(vec_duplicate:VDQSF
(vec_select:<VEL>
(match_operand:V4SF 3 "register_operand")
(parallel [(match_operand:SI 4 "immediate_operand")])))
(match_operand:VDQSF 2 "register_operand"))
(match_operand:VDQSF 1 "register_operand")))]
"TARGET_SIMD"
{
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
operands[3], operands[4]));
emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
DONE;
}
)
(define_expand "aarch64_float_mls_laneq<mode>"
[(set (match_operand:VDQSF 0 "register_operand")
(minus:VDQSF
(match_operand:VDQSF 1 "register_operand")
(mult:VDQSF
(vec_duplicate:VDQSF
(vec_select:<VEL>
(match_operand:V4SF 3 "register_operand")
(parallel [(match_operand:SI 4 "immediate_operand")])))
(match_operand:VDQSF 2 "register_operand"))))]
"TARGET_SIMD"
{
rtx scratch = gen_reg_rtx (<MODE>mode);
emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
operands[3], operands[4]));
emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "fma<mode>4"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")
(match_operand:VHSDF 3 "register_operand" "0")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_fp_mla_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "*aarch64_fma4_elt<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(fma:VDQF
(vec_duplicate:VDQF
(vec_select:<VEL>
(match_operand:VDQF 1 "register_operand" "<h_con>")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQF 3 "register_operand" "w")
(match_operand:VDQF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(vec_duplicate:VDQSF
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQSF 3 "register_operand" "w")
(match_operand:VDQSF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_fma4_elt_from_dup<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(fma:VMUL
(vec_duplicate:VMUL
(match_operand:<VEL> 1 "register_operand" "<h_con>"))
(match_operand:VMUL 2 "register_operand" "w")
(match_operand:VMUL 3 "register_operand" "0")))]
"TARGET_SIMD"
"fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
[(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
)
(define_insn "*aarch64_fma4_elt_to_64v2df"
[(set (match_operand:DF 0 "register_operand" "=w")
(fma:DF
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand")]))
(match_operand:DF 3 "register_operand" "w")
(match_operand:DF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
}
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
)
(define_insn "fnma<mode>4"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(fma:VHSDF
(neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
(match_operand:VHSDF 2 "register_operand" "w")
(match_operand:VHSDF 3 "register_operand" "0")))]
"TARGET_SIMD"
"fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_fp_mla_<stype><q>")]
)
(define_insn "*aarch64_fnma4_elt<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(fma:VDQF
(neg:VDQF
(match_operand:VDQF 3 "register_operand" "w"))
(vec_duplicate:VDQF
(vec_select:<VEL>
(match_operand:VDQF 1 "register_operand" "<h_con>")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(neg:VDQSF
(match_operand:VDQSF 3 "register_operand" "w"))
(vec_duplicate:VDQSF
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
(parallel [(match_operand:SI 2 "immediate_operand")])))
(match_operand:VDQSF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
}
[(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
)
(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(fma:VMUL
(neg:VMUL
(match_operand:VMUL 2 "register_operand" "w"))
(vec_duplicate:VMUL
(match_operand:<VEL> 1 "register_operand" "<h_con>"))
(match_operand:VMUL 3 "register_operand" "0")))]
"TARGET_SIMD"
"fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
[(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
)
(define_insn "*aarch64_fnma4_elt_to_64v2df"
[(set (match_operand:DF 0 "register_operand" "=w")
(fma:DF
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand")]))
(neg:DF
(match_operand:DF 3 "register_operand" "w"))
(match_operand:DF 4 "register_operand" "0")))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
}
[(set_attr "type" "neon_fp_mla_d_scalar_q")]
)
;; Vector versions of the floating-point frint patterns.
;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
(define_insn "<frint_pattern><mode>2"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
FRINT))]
[AARCH64] Add support for vectorizable standard math patterns. gcc/ * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): New. * config/aarch64/aarch64-protos.h (aarch64_builtin_vectorized_function): Declare. * config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add. (frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise. (fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise. * config/aarch64/aarch64-simd.md (aarch64_frint_<frint_suffix><mode>): New. (<frint_pattern><mode>2): Likewise. (aarch64_fcvt<frint_suffix><su><mode>): Likewise. (l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise. * config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define. (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise. * config/aarch64/aarch64.md (btrunc<mode>2, ceil<mode>2, floor<mode>2) (round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as... (<frint_pattern><mode>2): ...this. (lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2) (lround<su_optab><mode><mode>2) (lrint<su_optab><mode><mode>2): Consolidate as... (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this. * config/aarch64/iterators.md (fcvt_target): New. (FCVT_TARGET): Likewise. (FRINT): Likewise. (FCVT): Likewise. (frint_pattern): Likewise. (frint_suffix): Likewise. (fcvt_pattern): Likewise. gcc/testsuite/ * gcc.dg/vect/vect-rounding-btrunc.c: New test. * gcc.dg/vect/vect-rounding-btruncf.c: Likewise. * gcc.dg/vect/vect-rounding-ceil.c: Likewise. * gcc.dg/vect/vect-rounding-ceilf.c: Likewise. * gcc.dg/vect/vect-rounding-floor.c: Likewise. * gcc.dg/vect/vect-rounding-floorf.c: Likewise. * gcc.dg/vect/vect-rounding-lceil.c: Likewise. * gcc.dg/vect/vect-rounding-lfloor.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyint.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise. * gcc.dg/vect/vect-rounding-round.c: Likewise. * gcc.dg/vect/vect-rounding-roundf.c: Likewise. * target-supports.exp (check_effective_target_vect_call_btrunc): New. (check_effective_target_vect_call_btruncf): Likewise. (check_effective_target_vect_call_ceil): Likewise. (check_effective_target_vect_call_ceilf): Likewise. (check_effective_target_vect_call_floor): Likewise. (check_effective_target_vect_call_floorf): Likewise. (check_effective_target_vect_call_lceil): Likewise. (check_effective_target_vect_call_lfloor): Likewise. (check_effective_target_vect_call_nearbyint): Likewise. (check_effective_target_vect_call_nearbyintf): Likewise. (check_effective_target_vect_call_round): Likewise. (check_effective_target_vect_call_roundf): Likewise. From-SVN: r194197
2012-12-05 11:34:31 +01:00
"TARGET_SIMD"
"frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_fp_round_<stype><q>")]
[AARCH64] Add support for vectorizable standard math patterns. gcc/ * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): New. * config/aarch64/aarch64-protos.h (aarch64_builtin_vectorized_function): Declare. * config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add. (frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise. (fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise. * config/aarch64/aarch64-simd.md (aarch64_frint_<frint_suffix><mode>): New. (<frint_pattern><mode>2): Likewise. (aarch64_fcvt<frint_suffix><su><mode>): Likewise. (l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise. * config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define. (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise. * config/aarch64/aarch64.md (btrunc<mode>2, ceil<mode>2, floor<mode>2) (round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as... (<frint_pattern><mode>2): ...this. (lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2) (lround<su_optab><mode><mode>2) (lrint<su_optab><mode><mode>2): Consolidate as... (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this. * config/aarch64/iterators.md (fcvt_target): New. (FCVT_TARGET): Likewise. (FRINT): Likewise. (FCVT): Likewise. (frint_pattern): Likewise. (frint_suffix): Likewise. (fcvt_pattern): Likewise. gcc/testsuite/ * gcc.dg/vect/vect-rounding-btrunc.c: New test. * gcc.dg/vect/vect-rounding-btruncf.c: Likewise. * gcc.dg/vect/vect-rounding-ceil.c: Likewise. * gcc.dg/vect/vect-rounding-ceilf.c: Likewise. * gcc.dg/vect/vect-rounding-floor.c: Likewise. * gcc.dg/vect/vect-rounding-floorf.c: Likewise. * gcc.dg/vect/vect-rounding-lceil.c: Likewise. * gcc.dg/vect/vect-rounding-lfloor.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyint.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise. * gcc.dg/vect/vect-rounding-round.c: Likewise. * gcc.dg/vect/vect-rounding-roundf.c: Likewise. * target-supports.exp (check_effective_target_vect_call_btrunc): New. (check_effective_target_vect_call_btruncf): Likewise. (check_effective_target_vect_call_ceil): Likewise. (check_effective_target_vect_call_ceilf): Likewise. (check_effective_target_vect_call_floor): Likewise. (check_effective_target_vect_call_floorf): Likewise. (check_effective_target_vect_call_lceil): Likewise. (check_effective_target_vect_call_lfloor): Likewise. (check_effective_target_vect_call_nearbyint): Likewise. (check_effective_target_vect_call_nearbyintf): Likewise. (check_effective_target_vect_call_round): Likewise. (check_effective_target_vect_call_roundf): Likewise. From-SVN: r194197
2012-12-05 11:34:31 +01:00
)
;; Vector versions of the fcvt standard patterns.
;; Expands to lbtrunc, lround, lceil, lfloor
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
[AARCH64] Add support for vectorizable standard math patterns. gcc/ * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): New. * config/aarch64/aarch64-protos.h (aarch64_builtin_vectorized_function): Declare. * config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add. (frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise. (fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise. * config/aarch64/aarch64-simd.md (aarch64_frint_<frint_suffix><mode>): New. (<frint_pattern><mode>2): Likewise. (aarch64_fcvt<frint_suffix><su><mode>): Likewise. (l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise. * config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define. (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise. * config/aarch64/aarch64.md (btrunc<mode>2, ceil<mode>2, floor<mode>2) (round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as... (<frint_pattern><mode>2): ...this. (lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2) (lround<su_optab><mode><mode>2) (lrint<su_optab><mode><mode>2): Consolidate as... (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this. * config/aarch64/iterators.md (fcvt_target): New. (FCVT_TARGET): Likewise. (FRINT): Likewise. (FCVT): Likewise. (frint_pattern): Likewise. (frint_suffix): Likewise. (fcvt_pattern): Likewise. gcc/testsuite/ * gcc.dg/vect/vect-rounding-btrunc.c: New test. * gcc.dg/vect/vect-rounding-btruncf.c: Likewise. * gcc.dg/vect/vect-rounding-ceil.c: Likewise. * gcc.dg/vect/vect-rounding-ceilf.c: Likewise. * gcc.dg/vect/vect-rounding-floor.c: Likewise. * gcc.dg/vect/vect-rounding-floorf.c: Likewise. * gcc.dg/vect/vect-rounding-lceil.c: Likewise. * gcc.dg/vect/vect-rounding-lfloor.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyint.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise. * gcc.dg/vect/vect-rounding-round.c: Likewise. * gcc.dg/vect/vect-rounding-roundf.c: Likewise. * target-supports.exp (check_effective_target_vect_call_btrunc): New. (check_effective_target_vect_call_btruncf): Likewise. (check_effective_target_vect_call_ceil): Likewise. (check_effective_target_vect_call_ceilf): Likewise. (check_effective_target_vect_call_floor): Likewise. (check_effective_target_vect_call_floorf): Likewise. (check_effective_target_vect_call_lceil): Likewise. (check_effective_target_vect_call_lfloor): Likewise. (check_effective_target_vect_call_nearbyint): Likewise. (check_effective_target_vect_call_nearbyintf): Likewise. (check_effective_target_vect_call_round): Likewise. (check_effective_target_vect_call_roundf): Likewise. From-SVN: r194197
2012-12-05 11:34:31 +01:00
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(match_operand:VHSDF 1 "register_operand" "w")]
[AARCH64] Add support for vectorizable standard math patterns. gcc/ * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): New. * config/aarch64/aarch64-protos.h (aarch64_builtin_vectorized_function): Declare. * config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add. (frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise. (fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise. * config/aarch64/aarch64-simd.md (aarch64_frint_<frint_suffix><mode>): New. (<frint_pattern><mode>2): Likewise. (aarch64_fcvt<frint_suffix><su><mode>): Likewise. (l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise. * config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define. (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise. * config/aarch64/aarch64.md (btrunc<mode>2, ceil<mode>2, floor<mode>2) (round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as... (<frint_pattern><mode>2): ...this. (lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2) (lround<su_optab><mode><mode>2) (lrint<su_optab><mode><mode>2): Consolidate as... (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this. * config/aarch64/iterators.md (fcvt_target): New. (FCVT_TARGET): Likewise. (FRINT): Likewise. (FCVT): Likewise. (frint_pattern): Likewise. (frint_suffix): Likewise. (fcvt_pattern): Likewise. gcc/testsuite/ * gcc.dg/vect/vect-rounding-btrunc.c: New test. * gcc.dg/vect/vect-rounding-btruncf.c: Likewise. * gcc.dg/vect/vect-rounding-ceil.c: Likewise. * gcc.dg/vect/vect-rounding-ceilf.c: Likewise. * gcc.dg/vect/vect-rounding-floor.c: Likewise. * gcc.dg/vect/vect-rounding-floorf.c: Likewise. * gcc.dg/vect/vect-rounding-lceil.c: Likewise. * gcc.dg/vect/vect-rounding-lfloor.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyint.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise. * gcc.dg/vect/vect-rounding-round.c: Likewise. * gcc.dg/vect/vect-rounding-roundf.c: Likewise. * target-supports.exp (check_effective_target_vect_call_btrunc): New. (check_effective_target_vect_call_btruncf): Likewise. (check_effective_target_vect_call_ceil): Likewise. (check_effective_target_vect_call_ceilf): Likewise. (check_effective_target_vect_call_floor): Likewise. (check_effective_target_vect_call_floorf): Likewise. (check_effective_target_vect_call_lceil): Likewise. (check_effective_target_vect_call_lfloor): Likewise. (check_effective_target_vect_call_nearbyint): Likewise. (check_effective_target_vect_call_nearbyintf): Likewise. (check_effective_target_vect_call_round): Likewise. (check_effective_target_vect_call_roundf): Likewise. From-SVN: r194197
2012-12-05 11:34:31 +01:00
FCVT)))]
"TARGET_SIMD"
"fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_fp_to_int_<stype><q>")]
[AARCH64] Add support for vectorizable standard math patterns. gcc/ * config/aarch64/aarch64-builtins.c (aarch64_builtin_vectorized_function): New. * config/aarch64/aarch64-protos.h (aarch64_builtin_vectorized_function): Declare. * config/aarch64/aarch64-simd-builtins.def (frintz, frintp): Add. (frintm, frinti, frintx, frinta, fcvtzs, fcvtzu): Likewise. (fcvtas, fcvtau, fcvtps, fcvtpu, fcvtms, fcvtmu): Likewise. * config/aarch64/aarch64-simd.md (aarch64_frint_<frint_suffix><mode>): New. (<frint_pattern><mode>2): Likewise. (aarch64_fcvt<frint_suffix><su><mode>): Likewise. (l<fcvt_pattern><su_optab><fcvt_target><VDQF:mode>2): Likewise. * config/aarch64/aarch64.c (TARGET_VECTORIZE_BUILTINS): Define. (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise. * config/aarch64/aarch64.md (btrunc<mode>2, ceil<mode>2, floor<mode>2) (round<mode>2, rint<mode>2, nearbyint<mode>2): Consolidate as... (<frint_pattern><mode>2): ...this. (lceil<su_optab><mode><mode>2, lfloor<su_optab><mode><mode>2) (lround<su_optab><mode><mode>2) (lrint<su_optab><mode><mode>2): Consolidate as... (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): ... this. * config/aarch64/iterators.md (fcvt_target): New. (FCVT_TARGET): Likewise. (FRINT): Likewise. (FCVT): Likewise. (frint_pattern): Likewise. (frint_suffix): Likewise. (fcvt_pattern): Likewise. gcc/testsuite/ * gcc.dg/vect/vect-rounding-btrunc.c: New test. * gcc.dg/vect/vect-rounding-btruncf.c: Likewise. * gcc.dg/vect/vect-rounding-ceil.c: Likewise. * gcc.dg/vect/vect-rounding-ceilf.c: Likewise. * gcc.dg/vect/vect-rounding-floor.c: Likewise. * gcc.dg/vect/vect-rounding-floorf.c: Likewise. * gcc.dg/vect/vect-rounding-lceil.c: Likewise. * gcc.dg/vect/vect-rounding-lfloor.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyint.c: Likewise. * gcc.dg/vect/vect-rounding-nearbyintf.c: Likewise. * gcc.dg/vect/vect-rounding-round.c: Likewise. * gcc.dg/vect/vect-rounding-roundf.c: Likewise. * target-supports.exp (check_effective_target_vect_call_btrunc): New. (check_effective_target_vect_call_btruncf): Likewise. (check_effective_target_vect_call_ceil): Likewise. (check_effective_target_vect_call_ceilf): Likewise. (check_effective_target_vect_call_floor): Likewise. (check_effective_target_vect_call_floorf): Likewise. (check_effective_target_vect_call_lceil): Likewise. (check_effective_target_vect_call_lfloor): Likewise. (check_effective_target_vect_call_nearbyint): Likewise. (check_effective_target_vect_call_nearbyintf): Likewise. (check_effective_target_vect_call_round): Likewise. (check_effective_target_vect_call_roundf): Likewise. From-SVN: r194197
2012-12-05 11:34:31 +01:00
)
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics gcc/ * config.gcc (aarch64*-*-*): Install arm_fp16.h. * config/aarch64/aarch64-builtins.c (hi_UP): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF mode. (aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise. (fix_trunc<GPF:mode><GPI:mode>2): Likewise. (sqrt<mode>2): Likewise. (abs<mode>2): Likewise. (<optab><mode>hf2): New pattern for HF mode. (<optab>hihf2): Likewise. * config/aarch64/arm_neon.h: Include arm_fp16.h. * config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New. (w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE): Support HF mode. * config/aarch64/arm_fp16.h: New file. (vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16, vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16, vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16, vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16, vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16, vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16, vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16, vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16, vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16, vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16, vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16, vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16, vsqrth_f16): New. From-SVN: r238722
2016-07-25 18:00:28 +02:00
;; HF Scalar variants of related SIMD instructions.
(define_insn "l<fcvt_pattern><su_optab>hfhi2"
[(set (match_operand:HI 0 "register_operand" "=w")
(FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
FCVT)))]
"TARGET_SIMD_F16INST"
"fcvt<frint_suffix><su>\t%h0, %h1"
[(set_attr "type" "neon_fp_to_int_s")]
)
(define_insn "<optab>_trunchfhi2"
[(set (match_operand:HI 0 "register_operand" "=w")
(FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
"TARGET_SIMD_F16INST"
"fcvtz<su>\t%h0, %h1"
[(set_attr "type" "neon_fp_to_int_s")]
)
(define_insn "<optab>hihf2"
[(set (match_operand:HF 0 "register_operand" "=w")
(FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
"TARGET_SIMD_F16INST"
"<su_optab>cvtf\t%h0, %h1"
[(set_attr "type" "neon_int_to_fp_s")]
)
(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[(mult:VDQF
(match_operand:VDQF 1 "register_operand" "w")
(match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
UNSPEC_FRINTZ)))]
"TARGET_SIMD
&& IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
{
int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
char buf[64];
snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
output_asm_insn (buf, operands);
return "";
}
[(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
)
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
(define_expand "<optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ)))]
"TARGET_SIMD"
{})
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
[(set (match_operand:<FCVT_TARGET> 0 "register_operand")
(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ)))]
"TARGET_SIMD"
{})
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
(define_expand "ftrunc<VHSDF:mode>2"
[(set (match_operand:VHSDF 0 "register_operand")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
UNSPEC_FRINTZ))]
"TARGET_SIMD"
{})
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
(define_insn "<optab><fcvt_target><VHSDF:mode>2"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(FLOATUORS:VHSDF
(match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
"TARGET_SIMD"
"<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_int_to_fp_<stype><q>")]
)
;; Conversions between vectors of floats and doubles.
;; Contains a mix of patterns to match standard pattern names
;; and those for intrinsics.
;; Float widening operations.
(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(float_extend:<VWIDE> (vec_select:<VHALF>
(match_operand:VQ_HSF 1 "register_operand" "w")
(match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
)))]
"TARGET_SIMD"
"fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
;; Convert between fixed-point and floating-point (vector modes)
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
[(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
(unspec:<VHSDF:FCVT_TARGET>
[(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
FCVT_F2FIXED))]
"TARGET_SIMD"
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
)
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
[(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
(unspec:<VDQ_HSDI:FCVT_TARGET>
[(match_operand:VDQ_HSDI 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
FCVT_FIXED2F))]
"TARGET_SIMD"
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
)
;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
;; is inconsistent with vector ordering elsewhere in the compiler, in that
;; the meaning of HI and LO changes depending on the target endianness.
;; While elsewhere we map the higher numbered elements of a vector to
;; the lower architectural lanes of the vector, for these patterns we want
;; to always treat "hi" as referring to the higher architectural lanes.
;; Consequently, while the patterns below look inconsistent with our
cgraph.c: Spelling fixes - behaviour -> behavior and neighbour -> neighbor. * cgraph.c: Spelling fixes - behaviour -> behavior and neighbour -> neighbor. * target.def: Likewise. * sel-sched.c: Likewise. * config/mips/mips.c: Likewise. * config/arc/arc.md: Likewise. * config/arm/cortex-a57.md: Likewise. * config/arm/arm.c: Likewise. * config/arm/neon.md: Likewise. * config/arm/arm-c.c: Likewise. * config/vms/vms-c.c: Likewise. * config/s390/s390.c: Likewise. * config/i386/znver1.md: Likewise. * config/i386/i386.c: Likewise. * config/ia64/hpux-unix2003.h: Likewise. * config/msp430/msp430.md: Likewise. * config/rx/rx.c: Likewise. * config/rx/rx.md: Likewise. * config/aarch64/aarch64-simd.md: Likewise. * config/aarch64/aarch64.c: Likewise. * config/nvptx/nvptx.c: Likewise. * config/bfin/bfin.c: Likewise. * config/cris/cris.opt: Likewise. * config/rs6000/rs6000.c: Likewise. * target.h: Likewise. * spellcheck.c: Likewise. * ira-build.c: Likewise. * tree-inline.c: Likewise. * builtins.c: Likewise. * lra-constraints.c: Likewise. * explow.c: Likewise. * hwint.h: Likewise. * targhooks.c: Likewise. * tree-vect-data-refs.c: Likewise. * expr.c: Likewise. * doc/tm.texi: Likewise. * doc/extend.texi: Likewise. * doc/install.texi: Likewise. * doc/md.texi: Likewise. * tree-ssa-tail-merge.c: Likewise. * sched-int.h: Likewise. * match.pd: Likewise. * sched-ebb.c: Likewise. * target.def (omit_struct_return_reg): Likewise. * gimple-ssa-isolate-paths.c: Likewise. (find_implicit_erroneous_behaviour): Renamed to... (find_implicit_erroneous_behavior): ... this. (find_explicit_erroneous_behaviour): Renamed to... (find_explicit_erroneous_behavior): ... this. (gimple_ssa_isolate_erroneous_paths): Adjust caller. gcc/cp/ * error.c: Spelling fixes - behaviour -> behavior and neighbour -> neighbor. * decl.c: Likewise. * typeck.c (cp_build_binary_op): Fix up behavior spelling in diagnostics. * init.c (build_delete): Likewise. gcc/objc/ * objc-act.c: Spelling fixes - behaviour -> behavior and neighbour -> neighbor. * objc-map.h: Likewise. gcc/go/ * gofrontend/lex.cc: Spelling fixes - behaviour -> behavior and neighbour -> neighbor. * gccgo.texi: Likewise. gcc/ada/ * prj-tree.ads: Spelling fixes - behaviour -> behavior and neighbour -> neighbor. * prep.adb: Likewise. * prj.ads: Likewise. * prepcomp.adb: Likewise. * g-socket.ads: Likewise. * s-imgrea.adb: Likewise. * a-calend.adb: Likewise. * exp_disp.adb: Likewise. * doc/gnat_ugn/gnat_utility_programs.rst: Likewise. * g-socket.adb: Likewise. * sem_ch12.adb: Likewise. * terminals.c: Likewise. gcc/testsuite/ * objc.dg/gnu-api-2-method.m: Spelling fixes - behaviour -> behavior and neighbour -> neighbor. * objc.dg/attributes/method-nonnull-1.m: Likewise. * objc.dg/gnu-api-2-class-meta.m: Likewise. * c-c++-common/Wvarargs.c: Likewise. * c-c++-common/goacc/host_data-5.c: Likewise. * obj-c++.dg/gnu-api-2-class-meta.mm: Likewise. * obj-c++.dg/attributes/method-nonnull-1.mm: Likewise. * obj-c++.dg/gnu-api-2-method.mm: Likewise. * gcc.target/aarch64/pr60697.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshl.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vrshl.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqrshl.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vldX.c: Likewise. * gcc.target/aarch64/aapcs64/ice_2.c: Likewise. * gcc.target/aarch64/aapcs64/test_23.c: Likewise. * gcc.target/aarch64/vrnd_f64_1.c: Likewise. * g++.dg/warn/Wconversion-real-integer-3.C: Likewise. * g++.dg/lookup/koenig5.C: Likewise. * g++.dg/ext/no-asm-2.C: Likewise. * gfortran.dg/bounds_check_array_ctor_3.f90: Likewise. * gfortran.dg/bounds_check_array_ctor_7.f90: Likewise. * gfortran.dg/used_types_16.f90: Likewise. * gfortran.dg/assumed_rank_bounds_1.f90: Likewise. * gfortran.dg/bounds_check_array_ctor_1.f90: Likewise. * gfortran.dg/assumed_rank_bounds_2.f90: Likewise. * gfortran.dg/bounds_check_array_ctor_4.f90: Likewise. * gfortran.dg/abstract_type_6.f03: Likewise. * gfortran.dg/bounds_check_array_ctor_5.f90: Likewise. * gfortran.dg/used_types_15.f90: Likewise. * gfortran.dg/bounds_check_array_ctor_8.f90: Likewise. * gfortran.dg/exit_3.f08: Likewise. * gfortran.dg/open_status_2.f90: Likewise. * gfortran.dg/derived_pointer_recursion_2.f90: Likewise. * gfortran.dg/intrinsic_std_1.f90: Likewise. * gfortran.dg/associate_1.f03: Likewise. * gfortran.dg/bounds_check_array_ctor_2.f90: Likewise. * gfortran.dg/intrinsic_std_6.f90: Likewise. * gfortran.dg/bounds_check_array_ctor_6.f90: Likewise. * gcc.dg/builtin-object-size-1.c: Likewise. * gcc.dg/noreturn-6.c: Likewise. * gcc.dg/builtin-stringop-chk-1.c: Likewise. * gcc.dg/globalalias.c: Likewise. * gcc.dg/builtins-config.h: Likewise. * gcc.dg/pr30457.c: Likewise. * gcc.c-torture/compile/volatile-1.c: Likewise. * gcc.c-torture/execute/20101011-1.c: Likewise. * c-c++-common/Waddress-1.c: Likewise. From-SVN: r233358
2016-02-12 00:53:54 +01:00
;; other big-endian patterns their behavior is as required.
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQ_HSF 1 "register_operand")]
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
operands[1], p));
DONE;
}
)
(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(float_extend:<VWIDE> (vec_select:<VHALF>
(match_operand:VQ_HSF 1 "register_operand" "w")
(match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
)))]
"TARGET_SIMD"
"fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
(define_expand "vec_unpacks_hi_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQ_HSF 1 "register_operand")]
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
operands[1], p));
DONE;
}
)
(define_insn "aarch64_float_extend_lo_<Vwide>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(float_extend:<VWIDE>
(match_operand:VDF 1 "register_operand" "w")))]
"TARGET_SIMD"
"fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
;; Float narrowing operations.
(define_insn "aarch64_float_trunc_rodd_df"
[(set (match_operand:SF 0 "register_operand" "=w")
(unspec:SF [(match_operand:DF 1 "register_operand" "w")]
UNSPEC_FCVTXN))]
"TARGET_SIMD"
"fcvtxn\\t%s0, %d1"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_insn "aarch64_float_trunc_rodd_lo_v2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
UNSPEC_FCVTXN))]
"TARGET_SIMD"
"fcvtxn\\t%0.2s, %1.2d"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(vec_concat:V4SF
(match_operand:V2SF 1 "register_operand" "0")
(unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
UNSPEC_FCVTXN)))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"fcvtxn2\\t%0.4s, %2.2d"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(vec_concat:V4SF
(unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
UNSPEC_FCVTXN)
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"fcvtxn2\\t%0.4s, %2.2d"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_expand "aarch64_float_trunc_rodd_hi_v4sf"
[(match_operand:V4SF 0 "register_operand")
(match_operand:V2SF 1 "register_operand")
(match_operand:V2DF 2 "register_operand")]
"TARGET_SIMD"
{
rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
? gen_aarch64_float_trunc_rodd_hi_v4sf_be
: gen_aarch64_float_trunc_rodd_hi_v4sf_le;
emit_insn (gen (operands[0], operands[1], operands[2]));
DONE;
}
)
(define_insn "aarch64_float_truncate_lo_<mode>"
[(set (match_operand:VDF 0 "register_operand" "=w")
(float_truncate:VDF
(match_operand:<VWIDE> 1 "register_operand" "w")))]
"TARGET_SIMD"
"fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
(match_operand:VDF 1 "register_operand" "0")
(float_truncate:VDF
(match_operand:<VWIDE> 2 "register_operand" "w"))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
(float_truncate:VDF
(match_operand:<VWIDE> 2 "register_operand" "w"))
(match_operand:VDF 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
(define_expand "aarch64_float_truncate_hi_<Vdbl>"
[(match_operand:<VDBL> 0 "register_operand")
(match_operand:VDF 1 "register_operand")
(match_operand:<VWIDE> 2 "register_operand")]
"TARGET_SIMD"
{
rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
? gen_aarch64_float_truncate_hi_<Vdbl>_be
: gen_aarch64_float_truncate_hi_<Vdbl>_le;
emit_insn (gen (operands[0], operands[1], operands[2]));
DONE;
}
)
(define_expand "vec_pack_trunc_v2df"
[(set (match_operand:V4SF 0 "register_operand")
(vec_concat:V4SF
(float_truncate:V2SF
(match_operand:V2DF 1 "register_operand"))
(float_truncate:V2SF
(match_operand:V2DF 2 "register_operand"))
))]
"TARGET_SIMD"
{
rtx tmp = gen_reg_rtx (V2SFmode);
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
int hi = BYTES_BIG_ENDIAN ? 1 : 2;
emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
tmp, operands[hi]));
DONE;
}
)
(define_expand "vec_pack_trunc_df"
[(set (match_operand:V2SF 0 "register_operand")
(vec_concat:V2SF
(float_truncate:SF (match_operand:DF 1 "general_operand"))
(float_truncate:SF (match_operand:DF 2 "general_operand"))))]
"TARGET_SIMD"
{
rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; FP Max/Min
;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
;; expression like:
;; a = (b < c) ? b : c;
;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
;; -fno-signed-zeros are enabled either explicitly or indirectly via
;; -ffast-math.
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;;
;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
;; The 'smax' and 'smin' RTL standard pattern names do not specify which
;; operand will be returned when both operands are zero (i.e. they may not
;; honour signed zeroes), or when either operand is NaN. Therefore GCC
;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
;; NaNs.
(define_insn "<su><maxmin><mode>3"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; Vector forms for fmax, fmin, fmaxnm, fminnm.
;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
;; which implement the IEEE fmax ()/fmin () functions.
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
(define_insn "<fmaxmin><mode>3"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
FMAXMIN_UNS))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; 'across lanes' add.
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_faddp<mode>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
(match_operand:VHSDF 2 "register_operand" "w")]
UNSPEC_FADDV))]
"TARGET_SIMD"
"faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
)
(define_insn "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
UNSPEC_ADDV))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_reduc_add<q>")]
)
(define_insn "reduc_plus_scal_v2si"
[(set (match_operand:SI 0 "register_operand" "=w")
(unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
UNSPEC_ADDV))]
"TARGET_SIMD"
"addp\\t%0.2s, %1.2s, %1.2s"
[(set_attr "type" "neon_reduc_add")]
)
;; ADDV with result zero-extended to SI/DImode (for popcount).
(define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
[(set (match_operand:GPI 0 "register_operand" "=w")
(zero_extend:GPI
(unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
UNSPEC_ADDV)))]
"TARGET_SIMD"
"add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
[(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
)
(define_insn "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
UNSPEC_FADDV))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"faddp\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "reduc_plus_scal_v4sf"
[(set (match_operand:SF 0 "register_operand")
(unspec:SF [(match_operand:V4SF 1 "register_operand")]
UNSPEC_FADDV))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
rtx scratch = gen_reg_rtx (V4SFmode);
emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
DONE;
})
(define_insn "aarch64_<su>addlv<mode>"
[(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
(unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
USADDLV))]
"TARGET_SIMD"
"<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
[(set_attr "type" "neon_reduc_add<q>")]
)
(define_insn "aarch64_<su>addlp<mode>"
[(set (match_operand:<VDBLW> 0 "register_operand" "=w")
(unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")]
USADDLP))]
"TARGET_SIMD"
"<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
[(set_attr "type" "neon_reduc_add<q>")]
)
(define_insn "clrsb<mode>2"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
"TARGET_SIMD"
"cls\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_cls<q>")]
)
(define_insn "clz<mode>2"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
"TARGET_SIMD"
"clz\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_cls<q>")]
)
(define_insn "popcount<mode>2"
[(set (match_operand:VB 0 "register_operand" "=w")
(popcount:VB (match_operand:VB 1 "register_operand" "w")))]
"TARGET_SIMD"
"cnt\\t%0.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_cnt<q>")]
)
;; 'across lanes' max and min ops.
;; Template for outputting a scalar, so we can create __builtins which can be
Replace REDUC_*_EXPRs with internal functions. This patch replaces the REDUC_*_EXPR tree codes with internal functions. This is needed so that the upcoming in-order reductions can also use internal functions without too much complication. 2017-11-22 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Delete. * cfgexpand.c (expand_debug_expr): Remove handling for them. * expr.c (expand_expr_real_2): Likewise. * fold-const.c (const_unop): Likewise. * optabs-tree.c (optab_for_tree_code): Likewise. * tree-cfg.c (verify_gimple_assign_unary): Likewise. * tree-inline.c (estimate_operator_cost): Likewise. * tree-pretty-print.c (dump_generic_node): Likewise. (op_code_prio): Likewise. (op_symbol_code): Likewise. * internal-fn.def (DEF_INTERNAL_SIGNED_OPTAB_FN): Define. (IFN_REDUC_PLUS, IFN_REDUC_MAX, IFN_REDUC_MIN): New internal functions. * internal-fn.c (direct_internal_fn_optab): New function. (direct_internal_fn_array, direct_internal_fn_supported_p (internal_fn_expanders): Handle DEF_INTERNAL_SIGNED_OPTAB_FN. * fold-const-call.c (fold_const_reduction): New function. (fold_const_call): Handle CFN_REDUC_PLUS, CFN_REDUC_MAX and CFN_REDUC_MIN. * tree-vect-loop.c: Include internal-fn.h. (reduction_code_for_scalar_code): Rename to... (reduction_fn_for_scalar_code): ...this and return an internal function. (vect_model_reduction_cost): Take an internal_fn rather than a tree_code. (vect_create_epilog_for_reduction): Likewise. Build calls rather than assignments. (vectorizable_reduction): Use internal functions rather than tree codes for the reduction operation. Update calls to the functions above. * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin): Use calls to internal functions rather than REDUC tree codes. * config/aarch64/aarch64-simd.md: Update comment accordingly. From-SVN: r255073
2017-11-22 20:33:31 +01:00
;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
(define_expand "reduc_<optab>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
vect: Add support for fmax and fmin reductions This patch adds support for reductions involving calls to fmax*() and fmin*(), without the -ffast-math flags that allow them to be converted to MAX_EXPR and MIN_EXPR. gcc/ * doc/md.texi (reduc_fmin_scal_@var{m}): Document. (reduc_fmax_scal_@var{m}): Likewise. * optabs.def (reduc_fmax_scal_optab): New optab. (reduc_fmin_scal_optab): Likewise * internal-fn.def (REDUC_FMAX, REDUC_FMIN): New functions. * tree-vect-loop.c (reduction_fn_for_scalar_code): Handle CASE_CFN_FMAX and CASE_CFN_FMIN. (neutral_op_for_reduction): Likewise. (needs_fold_left_reduction_p): Likewise. * config/aarch64/iterators.md (FMAXMINV): New iterator. (fmaxmin): Handle UNSPEC_FMAXNMV and UNSPEC_FMINNMV. * config/aarch64/aarch64-simd.md (reduc_<optab>_scal_<mode>): Fix unspec mode. (reduc_<fmaxmin>_scal_<mode>): New pattern. * config/aarch64/aarch64-sve.md (reduc_<fmaxmin>_scal_<mode>): Likewise. gcc/testsuite/ * gcc.dg/vect/vect-fmax-1.c: New test. * gcc.dg/vect/vect-fmax-2.c: Likewise. * gcc.dg/vect/vect-fmax-3.c: Likewise. * gcc.dg/vect/vect-fmin-1.c: New test. * gcc.dg/vect/vect-fmin-2.c: Likewise. * gcc.dg/vect/vect-fmin-3.c: Likewise. * gcc.target/aarch64/fmaxnm_1.c: Likewise. * gcc.target/aarch64/fmaxnm_2.c: Likewise. * gcc.target/aarch64/fminnm_1.c: Likewise. * gcc.target/aarch64/fminnm_2.c: Likewise. * gcc.target/aarch64/sve/fmaxnm_2.c: Likewise. * gcc.target/aarch64/sve/fmaxnm_3.c: Likewise. * gcc.target/aarch64/sve/fminnm_2.c: Likewise. * gcc.target/aarch64/sve/fminnm_3.c: Likewise.
2021-11-30 10:52:25 +01:00
(unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
FMAXMINV)]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
rtx scratch = gen_reg_rtx (<MODE>mode);
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
operands[1]));
emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
DONE;
}
)
vect: Add support for fmax and fmin reductions This patch adds support for reductions involving calls to fmax*() and fmin*(), without the -ffast-math flags that allow them to be converted to MAX_EXPR and MIN_EXPR. gcc/ * doc/md.texi (reduc_fmin_scal_@var{m}): Document. (reduc_fmax_scal_@var{m}): Likewise. * optabs.def (reduc_fmax_scal_optab): New optab. (reduc_fmin_scal_optab): Likewise * internal-fn.def (REDUC_FMAX, REDUC_FMIN): New functions. * tree-vect-loop.c (reduction_fn_for_scalar_code): Handle CASE_CFN_FMAX and CASE_CFN_FMIN. (neutral_op_for_reduction): Likewise. (needs_fold_left_reduction_p): Likewise. * config/aarch64/iterators.md (FMAXMINV): New iterator. (fmaxmin): Handle UNSPEC_FMAXNMV and UNSPEC_FMINNMV. * config/aarch64/aarch64-simd.md (reduc_<optab>_scal_<mode>): Fix unspec mode. (reduc_<fmaxmin>_scal_<mode>): New pattern. * config/aarch64/aarch64-sve.md (reduc_<fmaxmin>_scal_<mode>): Likewise. gcc/testsuite/ * gcc.dg/vect/vect-fmax-1.c: New test. * gcc.dg/vect/vect-fmax-2.c: Likewise. * gcc.dg/vect/vect-fmax-3.c: Likewise. * gcc.dg/vect/vect-fmin-1.c: New test. * gcc.dg/vect/vect-fmin-2.c: Likewise. * gcc.dg/vect/vect-fmin-3.c: Likewise. * gcc.target/aarch64/fmaxnm_1.c: Likewise. * gcc.target/aarch64/fmaxnm_2.c: Likewise. * gcc.target/aarch64/fminnm_1.c: Likewise. * gcc.target/aarch64/fminnm_2.c: Likewise. * gcc.target/aarch64/sve/fmaxnm_2.c: Likewise. * gcc.target/aarch64/sve/fmaxnm_3.c: Likewise. * gcc.target/aarch64/sve/fminnm_2.c: Likewise. * gcc.target/aarch64/sve/fminnm_3.c: Likewise.
2021-11-30 10:52:25 +01:00
(define_expand "reduc_<fmaxmin>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
FMAXMINNMV)]
"TARGET_SIMD"
{
emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
DONE;
}
)
;; Likewise for integer cases, signed and unsigned.
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
(define_expand "reduc_<optab>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
MAXMINV)]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
rtx scratch = gen_reg_rtx (<MODE>mode);
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
operands[1]));
emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
DONE;
}
)
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
(define_insn "aarch64_reduc_<optab>_internal<mode>"
[(set (match_operand:VDQV_S 0 "register_operand" "=w")
(unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
MAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_reduc_minmax<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
(define_insn "aarch64_reduc_<optab>_internalv2si"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:V2SI 0 "register_operand" "=w")
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
MAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
[(set_attr "type" "neon_reduc_minmax")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
aarch64: Tweak FMAX/FMIN iterators There was some duplication between the maxmin_uns (uns for unspec rather than unsigned) int attribute and the optab int attribute. The difficulty for FMAXNM and FMINNM is that the instructions really correspond to two things: the smax/smin optabs for floats (used only for fast-math-like flags) and the fmax/fmin optabs (used for built-in functions). The optab attribute was consistently for the former but maxmin_uns had a mixture of both. This patch renames maxmin_uns to fmaxmin and only uses it for the fmax and fmin optabs. The reductions that previously used the maxmin_uns attribute now use the optab attribute instead. FMAX and FMIN are awkward in that they don't correspond to any optab. It's nevertheless useful to define them alongside the “real” optabs. Previously they were known as “smax_nan” and “smin_nan”, but the problem with those names it that smax and smin are only used for floats if NaNs don't matter. This patch therefore uses fmax_nan and fmin_nan instead. There is still some inconsistency, in that the optab attribute handles UNSPEC_COND_FMAX but the fmaxmin attribute handles UNSPEC_FMAX. This is because the SVE FP instructions, being predicated, have to use unspecs in cases where the Advanced SIMD ones could use rtl codes. At least there are no duplicate entries though, so this seemed like the best compromise for now. gcc/ * config/aarch64/iterators.md (optab): Use fmax_nan instead of smax_nan and fmin_nan instead of smin_nan. (maxmin_uns): Rename to... (fmaxmin): ...this and make the same changes. Remove entries unrelated to fmax* and fmin*. * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>): Rename to... (aarch64_<optab>p<mode>): ...this. (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. (reduc_<maxmin_uns>_scal_<mode>): Rename to... (reduc_<optab>_scal_<mode>): ...this and update gen* call. (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to... (aarch64_reduc_<optab>_internal<mode>): ...this. (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to... (aarch64_reduc_<optab>_internalv2si): ...this. * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to... (<fmaxmin><mode>3): ...this. * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan) Rename to... (fmax_nan, fmin_nan): ...this. * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64) (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16) (vmin_f16, vminq_f16): Update accordingly.
2021-11-10 13:38:43 +01:00
(define_insn "aarch64_reduc_<optab>_internal<mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
FMAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
)
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
;; allocation.
;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
;; to select.
;;
;; Thus our BSL is of the form:
;; op0 = bsl (mask, op2, op3)
;; We can use any of:
;;
;; if (op0 = mask)
;; bsl mask, op1, op2
;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
;; bit op0, op2, mask
;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
;; bif op0, op1, mask
;;
;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
;; Some forms of straight-line code may generate the equivalent form
;; in *aarch64_simd_bsl<mode>_alt.
(define_insn "aarch64_simd_bsl<mode>_internal"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
(xor:VDQ_I
(and:VDQ_I
(xor:VDQ_I
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
(match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
(match_operand:VDQ_I 2 "register_operand" "w,w,0"))
(match_operand:VDQ_I 1 "register_operand" "0,w,w"))
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
(match_dup:<V_INT_EQUIV> 3)
))]
"TARGET_SIMD"
"@
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_bsl<q>")]
)
;; We need this form in addition to the above pattern to match the case
;; when combine tries merging three insns such that the second operand of
;; the outer XOR matches the second operand of the inner XOR rather than
;; the first. The two are equivalent but since recog doesn't try all
;; permutations of commutative operations, we have to have a separate pattern.
(define_insn "*aarch64_simd_bsl<mode>_alt"
[(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
(xor:VDQ_I
(and:VDQ_I
(xor:VDQ_I
(match_operand:VDQ_I 3 "register_operand" "w,w,0")
(match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
(match_operand:VDQ_I 1 "register_operand" "0,w,w"))
(match_dup:<V_INT_EQUIV> 2)))]
"TARGET_SIMD"
"@
bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
[(set_attr "type" "neon_bsl<q>")]
)
;; DImode is special, we want to avoid computing operations which are
;; more naturally computed in general purpose registers in the vector
;; registers. If we do that, we need to move all three operands from general
;; purpose registers to vector registers, then back again. However, we
;; don't want to make this pattern an UNSPEC as we'd lose scope for
;; optimizations based on the component operations of a BSL.
;;
;; That means we need a splitter back to the individual operations, if they
;; would be better calculated on the integer side.
(define_insn_and_split "aarch64_simd_bsldi_internal"
[(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
(xor:DI
(and:DI
(xor:DI
(match_operand:DI 3 "register_operand" "w,0,w,r")
(match_operand:DI 2 "register_operand" "w,w,0,r"))
(match_operand:DI 1 "register_operand" "0,w,w,r"))
(match_dup:DI 3)
))]
"TARGET_SIMD"
"@
bsl\\t%0.8b, %2.8b, %3.8b
bit\\t%0.8b, %2.8b, %1.8b
bif\\t%0.8b, %3.8b, %1.8b
#"
"&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
{
/* Split back to individual operations. If we're before reload, and
able to create a temporary register, do so. If we're after reload,
we've got an early-clobber destination register, so use that.
Otherwise, we can't create pseudos and we can't yet guarantee that
operands[0] is safe to write, so FAIL to split. */
rtx scratch;
if (reload_completed)
scratch = operands[0];
else if (can_create_pseudo_p ())
scratch = gen_reg_rtx (DImode);
else
FAIL;
emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
DONE;
}
[(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
(set_attr "length" "4,4,4,12")]
)
(define_insn_and_split "aarch64_simd_bsldi_alt"
[(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
(xor:DI
(and:DI
(xor:DI
(match_operand:DI 3 "register_operand" "w,w,0,r")
(match_operand:DI 2 "register_operand" "w,0,w,r"))
(match_operand:DI 1 "register_operand" "0,w,w,r"))
(match_dup:DI 2)
))]
"TARGET_SIMD"
"@
bsl\\t%0.8b, %3.8b, %2.8b
bit\\t%0.8b, %3.8b, %1.8b
bif\\t%0.8b, %2.8b, %1.8b
#"
"&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
{
/* Split back to individual operations. If we're before reload, and
able to create a temporary register, do so. If we're after reload,
we've got an early-clobber destination register, so use that.
Otherwise, we can't create pseudos and we can't yet guarantee that
operands[0] is safe to write, so FAIL to split. */
rtx scratch;
if (reload_completed)
scratch = operands[0];
else if (can_create_pseudo_p ())
scratch = gen_reg_rtx (DImode);
else
FAIL;
emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
DONE;
}
[(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
(set_attr "length" "4,4,4,12")]
)
(define_expand "aarch64_simd_bsl<mode>"
[(match_operand:VALLDIF 0 "register_operand")
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
(match_operand:<V_INT_EQUIV> 1 "register_operand")
(match_operand:VALLDIF 2 "register_operand")
(match_operand:VALLDIF 3 "register_operand")]
"TARGET_SIMD"
{
/* We can't alias operands together if they have different modes. */
rtx tmp = operands[0];
if (FLOAT_MODE_P (<MODE>mode))
{
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
}
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
operands[1],
operands[2],
operands[3]));
if (tmp != operands[0])
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
DONE;
})
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
(define_expand "vcond_mask_<mode><v_int_equiv>"
[(match_operand:VALLDI 0 "register_operand")
(match_operand:VALLDI 1 "nonmemory_operand")
(match_operand:VALLDI 2 "nonmemory_operand")
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
(match_operand:<V_INT_EQUIV> 3 "register_operand")]
"TARGET_SIMD"
{
/* If we have (a = (P) ? -1 : 0);
Then we can simply move the generated mask (result must be int). */
if (operands[1] == CONSTM1_RTX (<MODE>mode)
&& operands[2] == CONST0_RTX (<MODE>mode))
emit_move_insn (operands[0], operands[3]);
/* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
else if (operands[1] == CONST0_RTX (<MODE>mode)
&& operands[2] == CONSTM1_RTX (<MODE>mode))
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
else
{
if (!REG_P (operands[1]))
operands[1] = force_reg (<MODE>mode, operands[1]);
if (!REG_P (operands[2]))
operands[2] = force_reg (<MODE>mode, operands[2]);
emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
operands[1], operands[2]));
}
DONE;
})
;; Patterns comparing two vectors to produce a mask.
(define_expand "vec_cmp<mode><mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand")
(match_operator 1 "comparison_operator"
[(match_operand:VSDQ_I_DI 2 "register_operand")
(match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
"TARGET_SIMD"
{
rtx mask = operands[0];
enum rtx_code code = GET_CODE (operands[1]);
switch (code)
{
case NE:
case LE:
case LT:
case GE:
case GT:
case EQ:
if (operands[3] == CONST0_RTX (<MODE>mode))
break;
/* Fall through. */
default:
if (!REG_P (operands[3]))
operands[3] = force_reg (<MODE>mode, operands[3]);
break;
}
switch (code)
{
case LT:
emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
break;
case GE:
emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
break;
case LE:
emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
break;
case GT:
emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
break;
case LTU:
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
break;
case GEU:
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
break;
case LEU:
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
break;
case GTU:
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
break;
case NE:
/* Handle NE as !EQ. */
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
break;
case EQ:
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
break;
default:
gcc_unreachable ();
}
DONE;
})
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
(define_expand "vec_cmp<mode><v_int_equiv>"
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
(match_operator 1 "comparison_operator"
[(match_operand:VDQF 2 "register_operand")
(match_operand:VDQF 3 "nonmemory_operand")]))]
"TARGET_SIMD"
{
int use_zero_form = 0;
enum rtx_code code = GET_CODE (operands[1]);
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
rtx (*comparison) (rtx, rtx, rtx) = NULL;
switch (code)
{
case LE:
case LT:
case GE:
case GT:
case EQ:
if (operands[3] == CONST0_RTX (<MODE>mode))
{
use_zero_form = 1;
break;
}
/* Fall through. */
default:
if (!REG_P (operands[3]))
operands[3] = force_reg (<MODE>mode, operands[3]);
break;
}
switch (code)
{
case LT:
if (use_zero_form)
{
comparison = gen_aarch64_cmlt<mode>;
break;
}
/* Fall through. */
case UNLT:
std::swap (operands[2], operands[3]);
/* Fall through. */
case UNGT:
case GT:
comparison = gen_aarch64_cmgt<mode>;
break;
case LE:
if (use_zero_form)
{
comparison = gen_aarch64_cmle<mode>;
break;
}
/* Fall through. */
case UNLE:
std::swap (operands[2], operands[3]);
/* Fall through. */
case UNGE:
case GE:
comparison = gen_aarch64_cmge<mode>;
break;
case NE:
case EQ:
comparison = gen_aarch64_cmeq<mode>;
break;
case UNEQ:
case ORDERED:
case UNORDERED:
[PATCH PR81228][AARCH64]Fix ICE by adding LTGT in vec_cmp<mode><v_int_equiv> This patch is a follow up to the existing discussions on https://gcc.gnu.org/ml/gcc-patches/2017-07/msg01904.html Bin had earlier submitted this patch to fix the ICE that occurs because of the missing LTGT in aarch64-simd.md. That discussion opened up a new bug report PR81647 for an inconsistent behavior. As discussed earlier on the gcc-patches discussion and on the bug report, PR81647 was occurring because of how UNEQ was handled in aarch64-simd.md rather than LTGT. Since __builtin_islessgreater is guaranteed to not give an FP exception but LTGT might, __builtin_islessgreater gets converted to ~UNEQ very early on in fold_builtin_unordered_cmp. Thus I will post a separate patch for correcting how UNEQ and other unordered comparisons are handled in aarch64-simd.md. This patch is only adding the missing LTGT to plug the ICE. Testing done: Checked for regressions on bootstrapped aarch64-none-linux-gnu and added a new compile time test case that gives out LTGT to make sure it doesn't ICE *** gcc/ChangeLog *** 2017-12-14 Sudakshina Das <sudi.das@arm.com> Bin Cheng <bin.cheng@arm.com> PR target/81228 * config/aarch64/aarch64.c (aarch64_select_cc_mode): Move LTGT to CCFPEmode. * config/aarch64/aarch64-simd.md (vec_cmp<mode><v_int_equiv>): Add LTGT. *** gcc/testsuite/ChangeLog *** 2017-12-14 Sudakshina Das <sudi.das@arm.com> PR target/81228 * gcc.dg/pr81228.c: New. Co-Authored-By: Bin Cheng <bin.cheng@arm.com> From-SVN: r255625
2017-12-14 11:35:38 +01:00
case LTGT:
break;
default:
gcc_unreachable ();
}
switch (code)
{
case UNGE:
case UNGT:
case UNLE:
case UNLT:
{
/* All of the above must not raise any FP exceptions. Thus we first
check each operand for NaNs and force any elements containing NaN to
zero before using them in the compare.
Example: UN<cc> (a, b) -> UNORDERED (a, b) |
(cm<cc> (isnan (a) ? 0.0 : a,
isnan (b) ? 0.0 : b))
We use the following transformations for doing the comparisions:
a UNGE b -> a GE b
a UNGT b -> a GT b
a UNLE b -> b GE a
a UNLT b -> b GT a. */
rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
lowpart_subreg (<V_INT_EQUIV>mode,
operands[2],
<MODE>mode)));
emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
lowpart_subreg (<V_INT_EQUIV>mode,
operands[3],
<MODE>mode)));
gcc_assert (comparison != NULL);
emit_insn (comparison (operands[0],
lowpart_subreg (<MODE>mode,
tmp0, <V_INT_EQUIV>mode),
lowpart_subreg (<MODE>mode,
tmp1, <V_INT_EQUIV>mode)));
emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
}
break;
case LT:
case LE:
case GT:
case GE:
case EQ:
case NE:
/* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
a GE b -> a GE b
a GT b -> a GT b
a LE b -> b GE a
a LT b -> b GT a
a EQ b -> a EQ b
a NE b -> ~(a EQ b) */
gcc_assert (comparison != NULL);
emit_insn (comparison (operands[0], operands[2], operands[3]));
if (code == NE)
emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
break;
[PATCH PR81228][AARCH64]Fix ICE by adding LTGT in vec_cmp<mode><v_int_equiv> This patch is a follow up to the existing discussions on https://gcc.gnu.org/ml/gcc-patches/2017-07/msg01904.html Bin had earlier submitted this patch to fix the ICE that occurs because of the missing LTGT in aarch64-simd.md. That discussion opened up a new bug report PR81647 for an inconsistent behavior. As discussed earlier on the gcc-patches discussion and on the bug report, PR81647 was occurring because of how UNEQ was handled in aarch64-simd.md rather than LTGT. Since __builtin_islessgreater is guaranteed to not give an FP exception but LTGT might, __builtin_islessgreater gets converted to ~UNEQ very early on in fold_builtin_unordered_cmp. Thus I will post a separate patch for correcting how UNEQ and other unordered comparisons are handled in aarch64-simd.md. This patch is only adding the missing LTGT to plug the ICE. Testing done: Checked for regressions on bootstrapped aarch64-none-linux-gnu and added a new compile time test case that gives out LTGT to make sure it doesn't ICE *** gcc/ChangeLog *** 2017-12-14 Sudakshina Das <sudi.das@arm.com> Bin Cheng <bin.cheng@arm.com> PR target/81228 * config/aarch64/aarch64.c (aarch64_select_cc_mode): Move LTGT to CCFPEmode. * config/aarch64/aarch64-simd.md (vec_cmp<mode><v_int_equiv>): Add LTGT. *** gcc/testsuite/ChangeLog *** 2017-12-14 Sudakshina Das <sudi.das@arm.com> PR target/81228 * gcc.dg/pr81228.c: New. Co-Authored-By: Bin Cheng <bin.cheng@arm.com> From-SVN: r255625
2017-12-14 11:35:38 +01:00
case LTGT:
/* LTGT is not guranteed to not generate a FP exception. So let's
go the faster way : ((a > b) || (b > a)). */
emit_insn (gen_aarch64_cmgt<mode> (operands[0],
operands[2], operands[3]));
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
break;
case ORDERED:
case UNORDERED:
case UNEQ:
/* cmeq (a, a) & cmeq (b, b). */
emit_insn (gen_aarch64_cmeq<mode> (operands[0],
operands[2], operands[2]));
emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
if (code == UNORDERED)
emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
else if (code == UNEQ)
{
emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
}
break;
default:
gcc_unreachable ();
}
DONE;
})
(define_expand "vec_cmpu<mode><mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand")
(match_operator 1 "comparison_operator"
[(match_operand:VSDQ_I_DI 2 "register_operand")
(match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
"TARGET_SIMD"
{
emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
operands[2], operands[3]));
DONE;
})
(define_expand "vcond<mode><mode>"
[(set (match_operand:VALLDI 0 "register_operand")
(if_then_else:VALLDI
(match_operator 3 "comparison_operator"
[(match_operand:VALLDI 4 "register_operand")
(match_operand:VALLDI 5 "nonmemory_operand")])
(match_operand:VALLDI 1 "nonmemory_operand")
(match_operand:VALLDI 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
enum rtx_code code = GET_CODE (operands[3]);
/* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
it as well as switch operands 1/2 in order to avoid the additional
NOT instruction. */
if (code == NE)
{
operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
operands[4], operands[5]);
std::swap (operands[1], operands[2]);
}
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
operands[2], mask));
DONE;
})
(define_expand "vcond<v_cmp_mixed><mode>"
[(set (match_operand:<V_cmp_mixed> 0 "register_operand")
(if_then_else:<V_cmp_mixed>
(match_operator 3 "comparison_operator"
[(match_operand:VDQF_COND 4 "register_operand")
(match_operand:VDQF_COND 5 "nonmemory_operand")])
(match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
(match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
enum rtx_code code = GET_CODE (operands[3]);
/* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
it as well as switch operands 1/2 in order to avoid the additional
NOT instruction. */
if (code == NE)
{
operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
operands[4], operands[5]);
std::swap (operands[1], operands[2]);
}
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
operands[0], operands[1],
operands[2], mask));
DONE;
})
(define_expand "vcondu<mode><mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand")
(if_then_else:VSDQ_I_DI
(match_operator 3 "comparison_operator"
[(match_operand:VSDQ_I_DI 4 "register_operand")
(match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
(match_operand:VSDQ_I_DI 1 "nonmemory_operand")
(match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
rtx mask = gen_reg_rtx (<MODE>mode);
enum rtx_code code = GET_CODE (operands[3]);
/* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
it as well as switch operands 1/2 in order to avoid the additional
NOT instruction. */
if (code == NE)
{
operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
operands[4], operands[5]);
std::swap (operands[1], operands[2]);
}
emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
operands[4], operands[5]));
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
operands[2], mask));
DONE;
})
(define_expand "vcondu<mode><v_cmp_mixed>"
[(set (match_operand:VDQF 0 "register_operand")
(if_then_else:VDQF
(match_operator 3 "comparison_operator"
[(match_operand:<V_cmp_mixed> 4 "register_operand")
(match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
(match_operand:VDQF 1 "nonmemory_operand")
(match_operand:VDQF 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
enum rtx_code code = GET_CODE (operands[3]);
/* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
it as well as switch operands 1/2 in order to avoid the additional
NOT instruction. */
if (code == NE)
{
operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
operands[4], operands[5]);
std::swap (operands[1], operands[2]);
}
emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
mask, operands[3],
operands[4], operands[5]));
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
operands[2], mask));
DONE;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; Patterns for AArch64 SIMD Intrinsics.
;; Lane extraction with sign extension to general purpose register.
(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(sign_extend:GPI
(vec_select:<VDQQH:VEL>
(match_operand:VDQQH 1 "register_operand" "w")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_SIMD"
{
operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
INTVAL (operands[2]));
return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
}
[(set_attr "type" "neon_to_gp<VDQQH:q>")]
)
(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(zero_extend:GPI
(vec_select:<VDQQH:VEL>
(match_operand:VDQQH 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_SIMD"
{
operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
INTVAL (operands[2]));
return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
}
[(set_attr "type" "neon_to_gp<VDQQH:q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; Lane extraction of a value, neither sign nor zero extension
;; is guaranteed so upper bits should be considered undefined.
[AArch64]Remove be_checked_get_lane, check bounds with __builtin_aarch64_im_lane_boundsi. gcc/: PR target/63870 * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane): Delete. * config/aarch64/aarch64-simd.md (aarch64_be_checked_get_lane<mode\>): Delete. * config/aarch64/arm_neon.h (aarch64_vget_lane_any): Use GCC vector extensions, __aarch64_lane, __builtin_aarch64_im_lane_boundsi. (__aarch64_vget_lane_f32, __aarch64_vget_lane_f64, __aarch64_vget_lane_p8, __aarch64_vget_lane_p16, __aarch64_vget_lane_s8, __aarch64_vget_lane_s16, __aarch64_vget_lane_s32, __aarch64_vget_lane_s64, __aarch64_vget_lane_u8, __aarch64_vget_lane_u16, __aarch64_vget_lane_u32, __aarch64_vget_lane_u64, __aarch64_vgetq_lane_f32, __aarch64_vgetq_lane_f64, __aarch64_vgetq_lane_p8, __aarch64_vgetq_lane_p16, __aarch64_vgetq_lane_s8, __aarch64_vgetq_lane_s16, __aarch64_vgetq_lane_s32, __aarch64_vgetq_lane_s64, __aarch64_vgetq_lane_u8, __aarch64_vgetq_lane_u16, __aarch64_vgetq_lane_u32, __aarch64_vgetq_lane_u64): Delete. (__aarch64_vdup_lane_any): Use __aarch64_vget_lane_any, remove 'q2' argument. (__aarch64_vdup_lane_f32, __aarch64_vdup_lane_f64, __aarch64_vdup_lane_p8, __aarch64_vdup_lane_p16, __aarch64_vdup_lane_s8, __aarch64_vdup_lane_s16, __aarch64_vdup_lane_s32, __aarch64_vdup_lane_s64, __aarch64_vdup_lane_u8, __aarch64_vdup_lane_u16, __aarch64_vdup_lane_u32, __aarch64_vdup_lane_u64, __aarch64_vdup_laneq_f32, __aarch64_vdup_laneq_f64, __aarch64_vdup_laneq_p8, __aarch64_vdup_laneq_p16, __aarch64_vdup_laneq_s8, __aarch64_vdup_laneq_s16, __aarch64_vdup_laneq_s32, __aarch64_vdup_laneq_s64, __aarch64_vdup_laneq_u8, __aarch64_vdup_laneq_u16, __aarch64_vdup_laneq_u32, __aarch64_vdup_laneq_u64): Remove argument to __aarch64_vdup_lane_any. (vget_lane_f32, vget_lane_f64, vget_lane_p8, vget_lane_p16, vget_lane_s8, vget_lane_s16, vget_lane_s32, vget_lane_s64, vget_lane_u8, vget_lane_u16, vget_lane_u32, vget_lane_u64, vgetq_lane_f32, vgetq_lane_f64, vgetq_lane_p8, vgetq_lane_p16, vgetq_lane_s8, vgetq_lane_s16, vgetq_lane_s32, vgetq_lane_s64, vgetq_lane_u8, vgetq_lane_u16, vgetq_lane_u32, vgetq_lane_u64, vdupb_lane_p8, vdupb_lane_s8, vdupb_lane_u8, vduph_lane_p16, vduph_lane_s16, vduph_lane_u16, vdups_lane_f32, vdups_lane_s32, vdups_lane_u32, vdupb_laneq_p8, vdupb_laneq_s8, vdupb_laneq_u8, vduph_laneq_p16, vduph_laneq_s16, vduph_laneq_u16, vdups_laneq_f32, vdups_laneq_s32, vdups_laneq_u32, vdupd_laneq_f64, vdupd_laneq_s64, vdupd_laneq_u64, vfmas_lane_f32, vfma_laneq_f64, vfmad_laneq_f64, vfmas_laneq_f32, vfmss_lane_f32, vfms_laneq_f64, vfmsd_laneq_f64, vfmss_laneq_f32, vmla_lane_f32, vmla_lane_s16, vmla_lane_s32, vmla_lane_u16, vmla_lane_u32, vmla_laneq_f32, vmla_laneq_s16, vmla_laneq_s32, vmla_laneq_u16, vmla_laneq_u32, vmlaq_lane_f32, vmlaq_lane_s16, vmlaq_lane_s32, vmlaq_lane_u16, vmlaq_lane_u32, vmlaq_laneq_f32, vmlaq_laneq_s16, vmlaq_laneq_s32, vmlaq_laneq_u16, vmlaq_laneq_u32, vmls_lane_f32, vmls_lane_s16, vmls_lane_s32, vmls_lane_u16, vmls_lane_u32, vmls_laneq_f32, vmls_laneq_s16, vmls_laneq_s32, vmls_laneq_u16, vmls_laneq_u32, vmlsq_lane_f32, vmlsq_lane_s16, vmlsq_lane_s32, vmlsq_lane_u16, vmlsq_lane_u32, vmlsq_laneq_f32, vmlsq_laneq_s16, vmlsq_laneq_s32, vmlsq_laneq_u16, vmlsq_laneq_u32, vmul_lane_f32, vmul_lane_s16, vmul_lane_s32, vmul_lane_u16, vmul_lane_u32, vmuld_lane_f64, vmuld_laneq_f64, vmuls_lane_f32, vmuls_laneq_f32, vmul_laneq_f32, vmul_laneq_f64, vmul_laneq_s16, vmul_laneq_s32, vmul_laneq_u16, vmul_laneq_u32, vmulq_lane_f32, vmulq_lane_s16, vmulq_lane_s32, vmulq_lane_u16, vmulq_lane_u32, vmulq_laneq_f32, vmulq_laneq_f64, vmulq_laneq_s16, vmulq_laneq_s32, vmulq_laneq_u16, vmulq_laneq_u32) : Use __aarch64_vget_lane_any. gcc/testsuite/: * gcc.target/aarch64/simd/vget_lane_f32_indices_1.c: New test. * gcc.target/aarch64/simd/vget_lane_f64_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_p16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_p8_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_s64_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_s8_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_u16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_u32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_u64_indices_1.c: Likewise. * gcc.target/aarch64/simd/vget_lane_u8_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_f32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_f64_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_p16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_p8_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_s64_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_s8_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_u16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_u32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_u64_indices_1.c: Likewise. * gcc.target/aarch64/simd/vgetq_lane_u8_indices_1.c: Likewise. From-SVN: r218536
2014-12-09 21:23:36 +01:00
;; RTL uses GCC vector extension indices throughout so flip only for assembly.
;; Extracting lane zero is split into a simple move when it is between SIMD
;; registers or a store.
(define_insn_and_split "aarch64_get_lane<mode>"
[(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(vec_select:<VEL>
(match_operand:VALL_F16 1 "register_operand" "w, w, w")
(parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
switch (which_alternative)
{
case 0:
return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
case 1:
return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
case 2:
return "st1\\t{%1.<Vetype>}[%2], %0";
default:
gcc_unreachable ();
}
}
"&& reload_completed
&& ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
[(set (match_dup 0) (match_dup 1))]
{
operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
}
[(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
vec_merge + vec_duplicate + vec_concat simplification Another vec_merge simplification that's missing is transforming: (vec_merge (vec_duplicate x) (vec_concat (y) (z)) (const_int N)) into (vec_concat x z) if N == 1 (0b01) or (vec_concat y x) if N == 2 (0b10) For the testcase in this patch on aarch64 this allows us to try matching during combine the pattern: (set (reg:V2DI 78 [ x ]) (vec_concat:V2DI (mem:DI (reg/v/f:DI 76 [ y ]) [1 *y_4(D)+0 S8 A64]) (mem:DI (plus:DI (reg/v/f:DI 76 [ y ]) (const_int 8 [0x8])) [1 MEM[(long long int *)y_4(D) + 8B]+0 S8 A64]))) rather than the more complex: (set (reg:V2DI 78 [ x ]) (vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (plus:DI (reg/v/f:DI 76 [ y ]) (const_int 8 [0x8])) [1 MEM[(long long int *)y_4(D) + 8B]+0 S8 A64])) (vec_duplicate:V2DI (mem:DI (reg/v/f:DI 76 [ y ]) [1 *y_4(D)+0 S8 A64])) (const_int 2 [0x2]))) We don't actually have an aarch64 pattern for the simplified version above, but it's a simple enough form to add, so this patch adds such a pattern that performs a concatenated load of two 64-bit vectors in adjacent memory locations as a single Q-register LDR. The new aarch64 pattern is needed to demonstrate the effectiveness of the simplify-rtx change, so I've kept them together as one patch. Now for the testcase in the patch we can generate: construct_lanedi: ldr q0, [x0] ret construct_lanedf: ldr q0, [x0] ret instead of: construct_lanedi: ld1r {v0.2d}, [x0] ldr x0, [x0, 8] ins v0.d[1], x0 ret construct_lanedf: ld1r {v0.2d}, [x0] ldr d1, [x0, 8] ins v0.d[1], v1.d[0] ret The new memory constraint Utq is needed because we need to allow only the Q-register addressing modes but the MEM expressions in the RTL pattern have 64-bit vector modes, and if we don't constrain them they will allow the D-register addressing modes during register allocation/address mode selection, which will produce invalid assembly. Bootstrapped and tested on aarch64-none-linux-gnu. * simplify-rtx.c (simplify_ternary_operation, VEC_MERGE): Simplify vec_merge of vec_duplicate and vec_concat. * config/aarch64/constraints.md (Utq): New constraint. * config/aarch64/aarch64-simd.md (load_pair_lanes<mode>): New define_insn. * gcc.target/aarch64/load_v2vec_lanes_1.c: New test. From-SVN: r254549
2017-11-08 19:27:57 +01:00
(define_insn "load_pair_lanes<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "memory_operand" "Utq")
(match_operand:VDCSIF 2 "memory_operand" "m")))]
"TARGET_SIMD
&& aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
"ldr\\t%<single_dtype>0, %1"
[(set_attr "type" "neon_load1_1reg<dblq>")]
vec_merge + vec_duplicate + vec_concat simplification Another vec_merge simplification that's missing is transforming: (vec_merge (vec_duplicate x) (vec_concat (y) (z)) (const_int N)) into (vec_concat x z) if N == 1 (0b01) or (vec_concat y x) if N == 2 (0b10) For the testcase in this patch on aarch64 this allows us to try matching during combine the pattern: (set (reg:V2DI 78 [ x ]) (vec_concat:V2DI (mem:DI (reg/v/f:DI 76 [ y ]) [1 *y_4(D)+0 S8 A64]) (mem:DI (plus:DI (reg/v/f:DI 76 [ y ]) (const_int 8 [0x8])) [1 MEM[(long long int *)y_4(D) + 8B]+0 S8 A64]))) rather than the more complex: (set (reg:V2DI 78 [ x ]) (vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (plus:DI (reg/v/f:DI 76 [ y ]) (const_int 8 [0x8])) [1 MEM[(long long int *)y_4(D) + 8B]+0 S8 A64])) (vec_duplicate:V2DI (mem:DI (reg/v/f:DI 76 [ y ]) [1 *y_4(D)+0 S8 A64])) (const_int 2 [0x2]))) We don't actually have an aarch64 pattern for the simplified version above, but it's a simple enough form to add, so this patch adds such a pattern that performs a concatenated load of two 64-bit vectors in adjacent memory locations as a single Q-register LDR. The new aarch64 pattern is needed to demonstrate the effectiveness of the simplify-rtx change, so I've kept them together as one patch. Now for the testcase in the patch we can generate: construct_lanedi: ldr q0, [x0] ret construct_lanedf: ldr q0, [x0] ret instead of: construct_lanedi: ld1r {v0.2d}, [x0] ldr x0, [x0, 8] ins v0.d[1], x0 ret construct_lanedf: ld1r {v0.2d}, [x0] ldr d1, [x0, 8] ins v0.d[1], v1.d[0] ret The new memory constraint Utq is needed because we need to allow only the Q-register addressing modes but the MEM expressions in the RTL pattern have 64-bit vector modes, and if we don't constrain them they will allow the D-register addressing modes during register allocation/address mode selection, which will produce invalid assembly. Bootstrapped and tested on aarch64-none-linux-gnu. * simplify-rtx.c (simplify_ternary_operation, VEC_MERGE): Simplify vec_merge of vec_duplicate and vec_concat. * config/aarch64/constraints.md (Utq): New constraint. * config/aarch64/aarch64-simd.md (load_pair_lanes<mode>): New define_insn. * gcc.target/aarch64/load_v2vec_lanes_1.c: New test. From-SVN: r254549
2017-11-08 19:27:57 +01:00
)
;; This STP pattern is a partial duplicate of the general vec_concat patterns
;; below. The reason for having both of them is that the alternatives of
;; the later patterns do not have consistent register preferences: the STP
;; alternatives have no preference between GPRs and FPRs (and if anything,
;; the GPR form is more natural for scalar integers) whereas the other
;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
;;
;; Using "*" to hide the STP alternatives from the RA penalizes cases in
;; which the destination was always memory. On the other hand, expressing
;; the true preferences makes GPRs seem more palatable than they really are
;; for register destinations.
;;
;; Despite that, we do still want the general form to have STP alternatives,
;; in order to handle cases where a register destination is spilled.
;;
;; The best compromise therefore seemed to be to have a dedicated STP
;; pattern to catch cases in which the destination was always memory.
;; This dedicated pattern must come first.
(define_insn "store_pair_lanes<mode>"
[(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "register_operand" "w, r")
(match_operand:VDCSIF 2 "register_operand" "w, r")))]
"TARGET_SIMD"
"@
stp\t%<single_type>1, %<single_type>2, %y0
stp\t%<single_wx>1, %<single_wx>2, %y0"
[(set_attr "type" "neon_stp, store_16")]
)
;; Form a vector whose least significant half comes from operand 1 and whose
;; most significant half comes from operand 2. The register alternatives
;; tie the least significant half to the same register as the destination,
;; so that only the other half needs to be handled explicitly. For the
;; reasons given above, the STP alternatives use ? for constraints that
;; the register alternatives either don't accept or themselves disparage.
(define_insn "*aarch64_combine_internal<mode>"
[(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")
(match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))]
"TARGET_SIMD
&& !BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <VDBL>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
ins\t%0.<single_type>[1], %2.<single_type>[0]
ins\t%0.<single_type>[1], %<single_wx>2
ld1\t{%0.<single_type>}[1], %2
stp\t%<single_type>1, %<single_type>2, %y0
stp\t%<single_wx>1, %<single_wx>2, %y0"
[(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
)
(define_insn "*aarch64_combine_internal_be<mode>"
[(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
(vec_concat:<VDBL>
(match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r")
(match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))]
"TARGET_SIMD
&& BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <VDBL>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
ins\t%0.<single_type>[1], %2.<single_type>[0]
ins\t%0.<single_type>[1], %<single_wx>2
ld1\t{%0.<single_type>}[1], %2
stp\t%<single_type>2, %<single_type>1, %y0
stp\t%<single_wx>2, %<single_wx>1, %y0"
[(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; In this insn, operand 1 should be low, and operand 2 the high part of the
;; dest vector.
(define_insn "*aarch64_combinez<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
Simplify vec_merge of vec_duplicate with const_vector I'm trying to improve some of the RTL-level handling of vector lane operations on aarch64 and that involves dealing with a lot of vec_merge operations. One simplification that I noticed missing from simplify-rtx are combinations of vec_merge with vec_duplicate. In this particular case: (vec_merge (vec_duplicate (X)) (const_vector [A, B]) (const_int N)) which can be replaced with (vec_concat (X) (B)) if N == 1 (0b01) or (vec_concat (A) (X)) if N == 2 (0b10). For the aarch64 testcase in this patch this simplifications allows us to try to combine: (set (reg:V2DI 77 [ x ]) (vec_concat:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64]) (const_int 0 [0]))) instead of the more complex: (set (reg:V2DI 77 [ x ]) (vec_merge:V2DI (vec_duplicate:V2DI (mem:DI (reg:DI 0 x0 [ y ]) [1 *y_3(D)+0 S8 A64])) (const_vector:V2DI [ (const_int 0 [0]) (const_int 0 [0]) ]) (const_int 1 [0x1]))) For the simplified form above we already have an aarch64 pattern: *aarch64_combinez<mode> which is missing a DI/DFmode version due to an oversight, so this patch extends that pattern as well to use the VDC mode iterator that includes DI and DFmode (as well as V2HF which VD_BHSI was missing). The aarch64 hunk is needed to see the benefit of the simplify-rtx.c hunk, so I didn't split them into separate patches. Before this for the testcase we'd generate: construct_lanedi: movi v0.4s, 0 ldr x0, [x0] ins v0.d[0], x0 ret construct_lanedf: movi v0.2d, 0 ldr d1, [x0] ins v0.d[0], v1.d[0] ret but now we can generate: construct_lanedi: ldr d0, [x0] ret construct_lanedf: ldr d0, [x0] ret Bootstrapped and tested on aarch64-none-linux-gnu. * simplify-rtx.c (simplify_ternary_operation, VEC_MERGE): Simplify vec_merge of vec_duplicate and const_vector. * config/aarch64/predicates.md (aarch64_simd_or_scalar_imm_zero): New predicate. * config/aarch64/aarch64-simd.md (*aarch64_combinez<mode>): Use VDC mode iterator. Update predicate on operand 1 to handle non-const_vec constants. Delete constraints. (*aarch64_combinez_be<mode>): Likewise for operand 2. * gcc.target/aarch64/construct_lane_zero_1.c: New test. From-SVN: r254548
2017-11-08 19:23:35 +01:00
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")
(match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
fmov\\t%<single_type>0, %<single_type>1
fmov\t%<single_type>0, %<single_wx>1
ldr\\t%<single_type>0, %1"
[(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
[AARCH64] use "arch_enabled" attribute for aarch64. arm.md has some attributes "arch" and "arch_enabled" to aid enabling and disabling insn alternatives based on the architecture being targeted. This patch introduces a similar attribute in the aarch64 backend. The new attribute will be used to enable a new alternative for the atomic_store insn in a future patch, but is an atomic change in itself. The new attribute has values "any", "fp", "fp16", "simd", and "sve". These attribute values have been taken from the pre-existing attributes "fp", "fp16", "simd", and "sve". The standalone "fp" attribute has been reintroduced in terms of the "arch" attribute as it's needed for the xgene1.md scheduling file -- the use in this file can't be changed to check for `(eq_attr "arch" "fp")` as the file is reused by the arm.md machine description whose 'arch' attribute doesn't have an 'fp' value. 2018-08-23 Matthew Malcomson <matthew.malcomson@arm.com> * config/aarch64/aarch64.md (arches): New enum. (arch): New enum attr. (arch_enabled): New attr. (enabled): Now uses arch_enabled only. (simd, sve, fp16): Removed attribute. (fp): Attr now defined in terms of 'arch'. (*mov<mode>_aarch64, *movsi_aarch64, *movdi_aarch64, *movti_aarch64, *movhf_aarch64, <optab><fcvt_target><GPF:mode>2, <FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3, <FCVT_FIXED2F:fcvt_fixed_insn><GPI:mode>3): Merge 'fp' and 'simd' attributes into 'arch'. (*movsf_aarch64, *movdf_aarch64, *movtf_aarch64, *add<mode>3_aarch64, subdi3, neg<mode>2, <optab><mode>3, one_cmpl<mode>2, *<NLOGICAL:optab>_one_cmpl<mode>3, *xor_one_cmpl<mode>3, *aarch64_ashl_sisd_or_int_<mode>3, *aarch64_lshr_sisd_or_int_<mode>3, *aarch64_ashr_sisd_or_int_<mode>3, *aarch64_sisd_ushl): Convert use of 'simd' attribute into 'arch'. (load_pair_sw_<SX:mode><SX2:mode>, load_pair_dw_<DX:mode><DX2:mode>, store_pair_sw_<SX:mode><SX2:mode>, store_pair_dw_<DX:mode><DX2:mode>): Convert use of 'fp' attribute to 'arch'. * config/aarch64/aarch64-simd.md (move_lo_quad_internal_<mode>, move_lo_quad_internal_<mode>): (different modes) Merge 'fp' and 'simd' into 'arch'. (move_lo_quad_internal_be_<mode>, move_lo_quad_internal_be_<mode>): (different modes) Merge 'fp' and 'simd' into 'arch'. (*aarch64_combinez<mode>, *aarch64_combinez_be<mode>): Merge 'fp' and 'simd' into 'arch'. From-SVN: r263811
2018-08-23 14:43:17 +02:00
(set_attr "arch" "simd,fp,simd")]
)
(define_insn "*aarch64_combinez_be<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
(match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
(match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"@
fmov\\t%<single_type>0, %<single_type>1
fmov\t%<single_type>0, %<single_wx>1
ldr\\t%<single_type>0, %1"
[(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
[AARCH64] use "arch_enabled" attribute for aarch64. arm.md has some attributes "arch" and "arch_enabled" to aid enabling and disabling insn alternatives based on the architecture being targeted. This patch introduces a similar attribute in the aarch64 backend. The new attribute will be used to enable a new alternative for the atomic_store insn in a future patch, but is an atomic change in itself. The new attribute has values "any", "fp", "fp16", "simd", and "sve". These attribute values have been taken from the pre-existing attributes "fp", "fp16", "simd", and "sve". The standalone "fp" attribute has been reintroduced in terms of the "arch" attribute as it's needed for the xgene1.md scheduling file -- the use in this file can't be changed to check for `(eq_attr "arch" "fp")` as the file is reused by the arm.md machine description whose 'arch' attribute doesn't have an 'fp' value. 2018-08-23 Matthew Malcomson <matthew.malcomson@arm.com> * config/aarch64/aarch64.md (arches): New enum. (arch): New enum attr. (arch_enabled): New attr. (enabled): Now uses arch_enabled only. (simd, sve, fp16): Removed attribute. (fp): Attr now defined in terms of 'arch'. (*mov<mode>_aarch64, *movsi_aarch64, *movdi_aarch64, *movti_aarch64, *movhf_aarch64, <optab><fcvt_target><GPF:mode>2, <FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3, <FCVT_FIXED2F:fcvt_fixed_insn><GPI:mode>3): Merge 'fp' and 'simd' attributes into 'arch'. (*movsf_aarch64, *movdf_aarch64, *movtf_aarch64, *add<mode>3_aarch64, subdi3, neg<mode>2, <optab><mode>3, one_cmpl<mode>2, *<NLOGICAL:optab>_one_cmpl<mode>3, *xor_one_cmpl<mode>3, *aarch64_ashl_sisd_or_int_<mode>3, *aarch64_lshr_sisd_or_int_<mode>3, *aarch64_ashr_sisd_or_int_<mode>3, *aarch64_sisd_ushl): Convert use of 'simd' attribute into 'arch'. (load_pair_sw_<SX:mode><SX2:mode>, load_pair_dw_<DX:mode><DX2:mode>, store_pair_sw_<SX:mode><SX2:mode>, store_pair_dw_<DX:mode><DX2:mode>): Convert use of 'fp' attribute to 'arch'. * config/aarch64/aarch64-simd.md (move_lo_quad_internal_<mode>, move_lo_quad_internal_<mode>): (different modes) Merge 'fp' and 'simd' into 'arch'. (move_lo_quad_internal_be_<mode>, move_lo_quad_internal_be_<mode>): (different modes) Merge 'fp' and 'simd' into 'arch'. (*aarch64_combinez<mode>, *aarch64_combinez_be<mode>): Merge 'fp' and 'simd' into 'arch'. From-SVN: r263811
2018-08-23 14:43:17 +02:00
(set_attr "arch" "simd,fp,simd")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; Form a vector whose first half (in array order) comes from operand 1
;; and whose second half (in array order) comes from operand 2.
;; This operand order follows the RTL vec_concat operation.
(define_expand "@aarch64_vec_concat<mode>"
[(set (match_operand:<VDBL> 0 "register_operand")
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "general_operand")
(match_operand:VDCSIF 2 "general_operand")))]
"TARGET_SIMD"
{
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
int hi = BYTES_BIG_ENDIAN ? 1 : 2;
if (MEM_P (operands[1])
&& MEM_P (operands[2])
&& aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
/* Use load_pair_lanes<mode>. */
;
else if (operands[hi] == CONST0_RTX (<MODE>mode))
{
/* Use *aarch64_combinez<mode>. */
if (!nonimmediate_operand (operands[lo], <MODE>mode))
operands[lo] = force_reg (<MODE>mode, operands[lo]);
}
else
{
/* Use *aarch64_combine_general<mode>. */
operands[lo] = force_reg (<MODE>mode, operands[lo]);
if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
{
if (MEM_P (operands[hi]))
{
rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
operands[hi] = replace_equiv_address (operands[hi], addr);
}
else
operands[hi] = force_reg (<MODE>mode, operands[hi]);
}
}
})
;; Form a vector whose least significant half comes from operand 1 and whose
;; most significant half comes from operand 2. This operand order follows
;; arm_neon.h vcombine* intrinsics.
(define_expand "aarch64_combine<mode>"
[(match_operand:<VDBL> 0 "register_operand")
(match_operand:VDC 1 "general_operand")
(match_operand:VDC 2 "general_operand")]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
std::swap (operands[1], operands[2]);
emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
operands[2]));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; <su><addsub>l<q>.
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_dup 3)))))]
"TARGET_SIMD"
"<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_<ADDSUB:optab>_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
(ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_dup 3)))))]
"TARGET_SIMD"
"<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
[(set_attr "type" "neon_<ADDSUB:optab>_long")]
)
(define_expand "vec_widen_<su>addl_lo_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
(ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "vec_widen_<su>addl_hi_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
(ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "vec_widen_<su>subl_lo_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
(ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "vec_widen_<su>subl_hi_<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
(ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "aarch64_saddl2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQW 1 "register_operand")
(match_operand:VQW 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
operands[2], p));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
(define_expand "aarch64_uaddl2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQW 1 "register_operand")
(match_operand:VQW 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
operands[2], p));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
(define_expand "aarch64_ssubl2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQW 1 "register_operand")
(match_operand:VQW 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
operands[2], p));
DONE;
})
(define_expand "aarch64_usubl2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQW 1 "register_operand")
(match_operand:VQW 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
operands[2], p));
DONE;
})
(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 1 "register_operand" "w"))
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 2 "register_operand" "w"))))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_<ADDSUB:optab>_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; <su><addsub>w<q>.
(define_expand "widen_ssum<mode>3"
[(set (match_operand:<VDBLW> 0 "register_operand")
(plus:<VDBLW> (sign_extend:<VDBLW>
(match_operand:VQW 1 "register_operand"))
(match_operand:<VDBLW> 2 "register_operand")))]
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
operands[1], p));
emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
DONE;
}
)
(define_expand "widen_ssum<mode>3"
[(set (match_operand:<VWIDE> 0 "register_operand")
(plus:<VWIDE> (sign_extend:<VWIDE>
(match_operand:VD_BHSI 1 "register_operand"))
(match_operand:<VWIDE> 2 "register_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
DONE;
})
(define_expand "widen_usum<mode>3"
[(set (match_operand:<VDBLW> 0 "register_operand")
(plus:<VDBLW> (zero_extend:<VDBLW>
(match_operand:VQW 1 "register_operand"))
(match_operand:<VDBLW> 2 "register_operand")))]
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
operands[1], p));
emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
DONE;
}
)
(define_expand "widen_usum<mode>3"
[(set (match_operand:<VWIDE> 0 "register_operand")
(plus:<VWIDE> (zero_extend:<VWIDE>
(match_operand:VD_BHSI 1 "register_operand"))
(match_operand:<VWIDE> 2 "register_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
DONE;
})
(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
(ANY_EXTEND:<VWIDE>
(match_operand:VD_BHSI 2 "register_operand" "w"))))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
[(set_attr "type" "neon_sub_widen")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
"TARGET_SIMD"
"<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
[(set_attr "type" "neon_sub_widen")]
)
(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
[(set_attr "type" "neon_sub_widen")]
)
(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
(match_operand:<VWIDE> 1 "register_operand" "w")))]
"TARGET_SIMD"
"<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
[(set_attr "type" "neon_add_widen")]
)
(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
(match_operand:<VWIDE> 1 "register_operand" "w")))]
"TARGET_SIMD"
"<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
[(set_attr "type" "neon_add_widen")]
)
(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(ANY_EXTEND:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQW 2 "register_operand" "w")
(match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
(match_operand:<VWIDE> 1 "register_operand" "w")))]
"TARGET_SIMD"
"<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
[(set_attr "type" "neon_add_widen")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_saddw2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(match_operand:VQW 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "aarch64_uaddw2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(match_operand:VQW 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "aarch64_ssubw2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(match_operand:VQW 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
(define_expand "aarch64_usubw2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:<VWIDE> 1 "register_operand")
(match_operand:VQW 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
;; <su><r>h<addsub>.
(define_expand "<u>avg<mode>3_floor"
[(set (match_operand:VDQ_BHSI 0 "register_operand")
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
(match_operand:VDQ_BHSI 2 "register_operand")]
HADD))]
"TARGET_SIMD"
)
(define_expand "<u>avg<mode>3_ceil"
[(set (match_operand:VDQ_BHSI 0 "register_operand")
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
(match_operand:VDQ_BHSI 2 "register_operand")]
RHADD))]
"TARGET_SIMD"
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_<sur>h<addsub><mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
(match_operand:VDQ_BHSI 2 "register_operand" "w")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
HADDSUB))]
"TARGET_SIMD"
"<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_<addsub>_halve<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; <r><addsub>hn<q>.
(define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "register_operand" "w")]
ADDSUBHN)
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "register_operand" "w")]
ADDSUBHN)))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
)
(define_expand "aarch64_<sur><addsub>hn<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand")
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
(match_operand:VQN 2 "register_operand")]
ADDSUBHN))]
"TARGET_SIMD"
{
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
operands[2], CONST0_RTX (<VNARROWQ>mode)));
else
emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
operands[2], CONST0_RTX (<VNARROWQ>mode)));
/* The intrinsic expects a narrow result, so emit a subreg that will get
optimized away as appropriate. */
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
<VNARROWQ2>mode));
DONE;
}
)
(define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "register_operand" "w")]
ADDSUBHN)))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
)
(define_insn "aarch64_<sur><addsub>hn2<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3 "register_operand" "w")]
ADDSUBHN)
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_<sur><addsub>hn2<mode>"
[(match_operand:<VNARROWQ2> 0 "register_operand")
(match_operand:<VNARROWQ> 1 "register_operand")
(unspec [(match_operand:VQN 2 "register_operand")
(match_operand:VQN 3 "register_operand")]
ADDSUBHN)]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_be (operands[0],
operands[1], operands[2], operands[3]));
else
emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_le (operands[0],
operands[1], operands[2], operands[3]));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; pmul.
(define_insn "aarch64_pmul<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
(unspec:VB [(match_operand:VB 1 "register_operand" "w")
(match_operand:VB 2 "register_operand" "w")]
UNSPEC_PMUL))]
"TARGET_SIMD"
"pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_mul_<Vetype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_pmullv8qi"
[(set (match_operand:V8HI 0 "register_operand" "=w")
(unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
(match_operand:V8QI 2 "register_operand" "w")]
UNSPEC_PMULL))]
"TARGET_SIMD"
"pmull\\t%0.8h, %1.8b, %2.8b"
[(set_attr "type" "neon_mul_b_long")]
)
(define_insn "aarch64_pmull_hiv16qi_insn"
[(set (match_operand:V8HI 0 "register_operand" "=w")
(unspec:V8HI
[(vec_select:V8QI
(match_operand:V16QI 1 "register_operand" "w")
(match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
(vec_select:V8QI
(match_operand:V16QI 2 "register_operand" "w")
(match_dup 3))]
UNSPEC_PMULL))]
"TARGET_SIMD"
"pmull2\\t%0.8h, %1.16b, %2.16b"
[(set_attr "type" "neon_mul_b_long")]
)
(define_expand "aarch64_pmull_hiv16qi"
[(match_operand:V8HI 0 "register_operand")
(match_operand:V16QI 1 "register_operand")
(match_operand:V16QI 2 "register_operand")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
operands[2], p));
DONE;
}
)
;; fmulx.
(define_insn "aarch64_fmulx<mode>"
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_HSDF
[(match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
UNSPEC_FMULX))]
"TARGET_SIMD"
"fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_mul_<stype>")]
)
;; vmulxq_lane_f32, and vmulx_laneq_f32
(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(unspec:VDQSF
[(match_operand:VDQSF 1 "register_operand" "w")
(vec_duplicate:VDQSF
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
UNSPEC_FMULX))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
)
;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
(define_insn "*aarch64_mulx_elt<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(unspec:VDQF
[(match_operand:VDQF 1 "register_operand" "w")
(vec_duplicate:VDQF
(vec_select:<VEL>
(match_operand:VDQF 2 "register_operand" "w")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
UNSPEC_FMULX))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_fp_mul_<Vetype><q>")]
)
;; vmulxq_lane
(define_insn "*aarch64_mulx_elt_from_dup<mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF
[(match_operand:VHSDF 1 "register_operand" "w")
(vec_duplicate:VHSDF
(match_operand:<VEL> 2 "register_operand" "<h_con>"))]
UNSPEC_FMULX))]
"TARGET_SIMD"
"fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
)
;; vmulxs_lane_f32, vmulxs_laneq_f32
;; vmulxd_lane_f64 == vmulx_lane_f64
;; vmulxd_laneq_f64 == vmulx_laneq_f64
(define_insn "*aarch64_vgetfmulx<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL>
[(match_operand:<VEL> 1 "register_operand" "w")
(vec_select:<VEL>
(match_operand:VDQF 2 "register_operand" "w")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
UNSPEC_FMULX))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
}
[(set_attr "type" "fmul<Vetype>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; <su>q<addsub>
[AArch64] Tweak iterator usage for [SU]Q{ADD,SUB} The pattern: ;; <su>q<addsub> (define_insn "aarch64_<su_optab><optab><mode>" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") (match_operand:VSDQ_I 2 "register_operand" "w")))] "TARGET_SIMD" "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" [(set_attr "type" "neon_<optab><q>")] ) meant that we overloaded "optab" to be "qadd" for both SQADD and UQADD. Most other "optab" entries are instead the full optab name, which for these patterns would be "ssadd" and "usadd" respectively. (Unfortunately, the optabs don't extend to vectors yet, something that would be good to fix in GCC 11.) This patch therefore does what the comment implies and uses q<addsub> to distinguish qadd and qsub instead. 2020-01-09 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/iterators.md (addsub): New code attribute. * config/aarch64/aarch64-simd.md (aarch64_<su_optab><optab><mode>): Re-express as... (aarch64_<su_optab>q<addsub><mode>): ...this, making the same change in the asm string and attributes. Fix indentation. * config/aarch64/aarch64-sve.md (@aarch64_<su_optab><optab><mode>): Re-express as... (@aarch64_sve_<optab><mode>): ...this. * config/aarch64/aarch64-sve-builtins.h (function_expander::expand_signed_unpred_op): Delete. * config/aarch64/aarch64-sve-builtins.cc (function_expander::expand_signed_unpred_op): Likewise. (function_expander::map_to_rtx_codes): If the optab isn't defined, try using code_for_aarch64_sve instead. * config/aarch64/aarch64-sve-builtins-base.cc (svqadd_impl): Delete. (svqsub_impl): Likewise. (svqadd, svqsub): Use rtx_code_function instead. From-SVN: r280050
2020-01-09 16:15:17 +01:00
(define_insn "aarch64_<su_optab>q<addsub><mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
[AArch64] Tweak iterator usage for [SU]Q{ADD,SUB} The pattern: ;; <su>q<addsub> (define_insn "aarch64_<su_optab><optab><mode>" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") (match_operand:VSDQ_I 2 "register_operand" "w")))] "TARGET_SIMD" "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" [(set_attr "type" "neon_<optab><q>")] ) meant that we overloaded "optab" to be "qadd" for both SQADD and UQADD. Most other "optab" entries are instead the full optab name, which for these patterns would be "ssadd" and "usadd" respectively. (Unfortunately, the optabs don't extend to vectors yet, something that would be good to fix in GCC 11.) This patch therefore does what the comment implies and uses q<addsub> to distinguish qadd and qsub instead. 2020-01-09 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/iterators.md (addsub): New code attribute. * config/aarch64/aarch64-simd.md (aarch64_<su_optab><optab><mode>): Re-express as... (aarch64_<su_optab>q<addsub><mode>): ...this, making the same change in the asm string and attributes. Fix indentation. * config/aarch64/aarch64-sve.md (@aarch64_<su_optab><optab><mode>): Re-express as... (@aarch64_sve_<optab><mode>): ...this. * config/aarch64/aarch64-sve-builtins.h (function_expander::expand_signed_unpred_op): Delete. * config/aarch64/aarch64-sve-builtins.cc (function_expander::expand_signed_unpred_op): Likewise. (function_expander::map_to_rtx_codes): If the optab isn't defined, try using code_for_aarch64_sve instead. * config/aarch64/aarch64-sve-builtins-base.cc (svqadd_impl): Delete. (svqsub_impl): Likewise. (svqadd, svqsub): Use rtx_code_function instead. From-SVN: r280050
2020-01-09 16:15:17 +01:00
(match_operand:VSDQ_I 2 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
[AArch64] Tweak iterator usage for [SU]Q{ADD,SUB} The pattern: ;; <su>q<addsub> (define_insn "aarch64_<su_optab><optab><mode>" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") (match_operand:VSDQ_I 2 "register_operand" "w")))] "TARGET_SIMD" "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" [(set_attr "type" "neon_<optab><q>")] ) meant that we overloaded "optab" to be "qadd" for both SQADD and UQADD. Most other "optab" entries are instead the full optab name, which for these patterns would be "ssadd" and "usadd" respectively. (Unfortunately, the optabs don't extend to vectors yet, something that would be good to fix in GCC 11.) This patch therefore does what the comment implies and uses q<addsub> to distinguish qadd and qsub instead. 2020-01-09 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/iterators.md (addsub): New code attribute. * config/aarch64/aarch64-simd.md (aarch64_<su_optab><optab><mode>): Re-express as... (aarch64_<su_optab>q<addsub><mode>): ...this, making the same change in the asm string and attributes. Fix indentation. * config/aarch64/aarch64-sve.md (@aarch64_<su_optab><optab><mode>): Re-express as... (@aarch64_sve_<optab><mode>): ...this. * config/aarch64/aarch64-sve-builtins.h (function_expander::expand_signed_unpred_op): Delete. * config/aarch64/aarch64-sve-builtins.cc (function_expander::expand_signed_unpred_op): Likewise. (function_expander::map_to_rtx_codes): If the optab isn't defined, try using code_for_aarch64_sve instead. * config/aarch64/aarch64-sve-builtins-base.cc (svqadd_impl): Delete. (svqsub_impl): Likewise. (svqadd, svqsub): Use rtx_code_function instead. From-SVN: r280050
2020-01-09 16:15:17 +01:00
"<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_q<addsub><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; suqadd and usqadd
(define_insn "aarch64_<sur>qadd<mode>"
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
(match_operand:VSDQ_I 2 "register_operand" "w")]
USSUQADD))]
"TARGET_SIMD"
"<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_qadd<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; sqmovn and uqmovn
(define_insn "aarch64_<su>qmovn<mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(SAT_TRUNC:<VNARROWQ>
(match_operand:SD_HSDI 1 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_insn "aarch64_<su>qmovn<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(SAT_TRUNC:<VNARROWQ>
(match_operand:VQN 1 "register_operand" "w"))
(match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_insn "aarch64_<su>qmovn<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
(SAT_TRUNC:<VNARROWQ>
(match_operand:VQN 1 "register_operand" "w"))))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_expand "aarch64_<su>qmovn<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand")
(SAT_TRUNC:<VNARROWQ>
(match_operand:VQN 1 "register_operand")))]
"TARGET_SIMD"
{
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
CONST0_RTX (<VNARROWQ>mode)));
else
emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
CONST0_RTX (<VNARROWQ>mode)));
/* The intrinsic expects a narrow result, so emit a subreg that will get
optimized away as appropriate. */
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
<VNARROWQ2>mode));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_<su>qxtn2<mode>_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(SAT_TRUNC:<VNARROWQ>
(match_operand:VQN 2 "register_operand" "w"))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_insn "aarch64_<su>qxtn2<mode>_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(SAT_TRUNC:<VNARROWQ>
(match_operand:VQN 2 "register_operand" "w"))
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_expand "aarch64_<su>qxtn2<mode>"
[(match_operand:<VNARROWQ2> 0 "register_operand")
(match_operand:<VNARROWQ> 1 "register_operand")
(SAT_TRUNC:<VNARROWQ>
(match_operand:VQN 2 "register_operand"))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
operands[2]));
else
emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
operands[2]));
DONE;
}
)
;; sqmovun
(define_insn "aarch64_sqmovun<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
UNSPEC_SQXTUN))]
"TARGET_SIMD"
"sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_insn "aarch64_sqmovun<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
UNSPEC_SQXTUN)
(match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_insn "aarch64_sqmovun<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
UNSPEC_SQXTUN)))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_expand "aarch64_sqmovun<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand")
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")]
UNSPEC_SQXTUN))]
"TARGET_SIMD"
{
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_sqmovun<mode>_insn_be (tmp, operands[1],
CONST0_RTX (<VNARROWQ>mode)));
else
emit_insn (gen_aarch64_sqmovun<mode>_insn_le (tmp, operands[1],
CONST0_RTX (<VNARROWQ>mode)));
/* The intrinsic expects a narrow result, so emit a subreg that will get
optimized away as appropriate. */
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
<VNARROWQ2>mode));
DONE;
}
)
(define_insn "aarch64_sqxtun2<mode>_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(unspec:<VNARROWQ>
[(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_insn "aarch64_sqxtun2<mode>_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ>
[(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_expand "aarch64_sqxtun2<mode>"
[(match_operand:<VNARROWQ2> 0 "register_operand")
(match_operand:<VNARROWQ> 1 "register_operand")
(unspec:<VNARROWQ>
[(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN)]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
operands[2]));
else
emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
operands[2]));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; <su>q<absneg>
(define_insn "aarch64_s<optab><mode>"
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
(UNQOPS:VSDQ_I
(match_operand:VSDQ_I 1 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_<optab><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; sq<r>dmulh.
(define_insn "aarch64_sq<r>dmulh<mode>"
[(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
(unspec:VSDQ_HSI
[(match_operand:VSDQ_HSI 1 "register_operand" "w")
(match_operand:VSDQ_HSI 2 "register_operand" "w")]
VQDMULH))]
"TARGET_SIMD"
"sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_sat_mul_<Vetype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_sq<r>dmulh_n<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
(vec_duplicate:VDQHS
(match_operand:<VEL> 2 "register_operand" "<h_con>"))]
VQDMULH))]
"TARGET_SIMD"
"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; sq<r>dmulh_lane
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness gcc/: * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_index. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to... (aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New. (aarch64_types_getlane_qualifiers): Rename to... (aarch64_types_binop_imm_qualifiers): ...this. (TYPES_SHIFTIMM): Follow renaming. (TYPES_GETLANE): Rename to... (TYPE_GETREG): ...this. (aarch64_types_setlane_qualifiers): Rename to... (aarch64_type_ternop_imm_qualifiers): ...this. (TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming. (TYPES_SETLANE): Follow renaming above, and rename self to... (TYPE_SETREG): ...this. (enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX. (aarch64_simd_expand_args): Add range check and endianness-flip. (aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index. * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check. (aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete. (aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this. (aarch64_sqdmull_lane<mode>_internal *2): Rename to... (aarch64_sqdmull_lane<mode>): ...this. (aarch64_sqdmull_laneq<mode>_internal *2): Rename to... (aarch64_sqdmull_laneq<mode>): ...this. (aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>, (aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>, aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>, aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete. (aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove bounds check and lane flip. * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane, get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi, set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG. (sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow renaming of TERNOP_LANE to QUADOP_LANE. (sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq, sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set qualifiers to TERNOP_LANE. gcc/testsuite/: * gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise. From-SVN: r217440
2014-11-12 19:51:53 +01:00
(define_insn "aarch64_sq<r>dmulh_lane<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
(vec_select:<VEL>
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
VQDMULH))]
"TARGET_SIMD"
"*
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness gcc/: * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_index. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to... (aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New. (aarch64_types_getlane_qualifiers): Rename to... (aarch64_types_binop_imm_qualifiers): ...this. (TYPES_SHIFTIMM): Follow renaming. (TYPES_GETLANE): Rename to... (TYPE_GETREG): ...this. (aarch64_types_setlane_qualifiers): Rename to... (aarch64_type_ternop_imm_qualifiers): ...this. (TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming. (TYPES_SETLANE): Follow renaming above, and rename self to... (TYPE_SETREG): ...this. (enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX. (aarch64_simd_expand_args): Add range check and endianness-flip. (aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index. * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check. (aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete. (aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this. (aarch64_sqdmull_lane<mode>_internal *2): Rename to... (aarch64_sqdmull_lane<mode>): ...this. (aarch64_sqdmull_laneq<mode>_internal *2): Rename to... (aarch64_sqdmull_laneq<mode>): ...this. (aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>, (aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>, aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>, aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete. (aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove bounds check and lane flip. * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane, get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi, set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG. (sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow renaming of TERNOP_LANE to QUADOP_LANE. (sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq, sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set qualifiers to TERNOP_LANE. gcc/testsuite/: * gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise. From-SVN: r217440
2014-11-12 19:51:53 +01:00
(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
(vec_select:<VEL>
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
VQDMULH))]
"TARGET_SIMD"
"*
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness gcc/: * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_index. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to... (aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New. (aarch64_types_getlane_qualifiers): Rename to... (aarch64_types_binop_imm_qualifiers): ...this. (TYPES_SHIFTIMM): Follow renaming. (TYPES_GETLANE): Rename to... (TYPE_GETREG): ...this. (aarch64_types_setlane_qualifiers): Rename to... (aarch64_type_ternop_imm_qualifiers): ...this. (TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming. (TYPES_SETLANE): Follow renaming above, and rename self to... (TYPE_SETREG): ...this. (enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX. (aarch64_simd_expand_args): Add range check and endianness-flip. (aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index. * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check. (aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete. (aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this. (aarch64_sqdmull_lane<mode>_internal *2): Rename to... (aarch64_sqdmull_lane<mode>): ...this. (aarch64_sqdmull_laneq<mode>_internal *2): Rename to... (aarch64_sqdmull_laneq<mode>): ...this. (aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>, (aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>, aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>, aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete. (aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove bounds check and lane flip. * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane, get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi, set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG. (sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow renaming of TERNOP_LANE to QUADOP_LANE. (sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq, sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set qualifiers to TERNOP_LANE. gcc/testsuite/: * gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise. From-SVN: r217440
2014-11-12 19:51:53 +01:00
(define_insn "aarch64_sq<r>dmulh_lane<mode>"
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
(unspec:SD_HSI
[(match_operand:SD_HSI 1 "register_operand" "w")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
VQDMULH))]
"TARGET_SIMD"
"*
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness gcc/: * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_index. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to... (aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New. (aarch64_types_getlane_qualifiers): Rename to... (aarch64_types_binop_imm_qualifiers): ...this. (TYPES_SHIFTIMM): Follow renaming. (TYPES_GETLANE): Rename to... (TYPE_GETREG): ...this. (aarch64_types_setlane_qualifiers): Rename to... (aarch64_type_ternop_imm_qualifiers): ...this. (TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming. (TYPES_SETLANE): Follow renaming above, and rename self to... (TYPE_SETREG): ...this. (enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX. (aarch64_simd_expand_args): Add range check and endianness-flip. (aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index. * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check. (aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete. (aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this. (aarch64_sqdmull_lane<mode>_internal *2): Rename to... (aarch64_sqdmull_lane<mode>): ...this. (aarch64_sqdmull_laneq<mode>_internal *2): Rename to... (aarch64_sqdmull_laneq<mode>): ...this. (aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>, (aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>, aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>, aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete. (aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove bounds check and lane flip. * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane, get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi, set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG. (sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow renaming of TERNOP_LANE to QUADOP_LANE. (sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq, sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set qualifiers to TERNOP_LANE. gcc/testsuite/: * gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise. From-SVN: r217440
2014-11-12 19:51:53 +01:00
(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
(unspec:SD_HSI
[(match_operand:SD_HSI 1 "register_operand" "w")
(vec_select:<VEL>
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
VQDMULH))]
"TARGET_SIMD"
"*
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
;; sqrdml[as]h.
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
[(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
(unspec:VSDQ_HSI
[(match_operand:VSDQ_HSI 1 "register_operand" "0")
(match_operand:VSDQ_HSI 2 "register_operand" "w")
(match_operand:VSDQ_HSI 3 "register_operand" "w")]
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
[(set_attr "type" "neon_sat_mla_<Vetype>_long")]
)
;; sqrdml[as]h_lane.
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "0")
(match_operand:VDQHS 2 "register_operand" "w")
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
(unspec:SD_HSI
[(match_operand:SD_HSI 1 "register_operand" "0")
(match_operand:SD_HSI 2 "register_operand" "w")
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
;; sqrdml[as]h_laneq.
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "0")
(match_operand:VDQHS 2 "register_operand" "w")
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
(unspec:SD_HSI
[(match_operand:SD_HSI 1 "register_operand" "0")
(match_operand:SD_HSI 2 "register_operand" "w")
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
SQRDMLH_AS))]
"TARGET_SIMD_RDMA"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; vqdml[sa]l
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(define_insn "aarch64_sqdmlal<mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VSD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(match_operand:VSD_HSI 3 "register_operand" "w")))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
[(set_attr "type" "neon_sat_mla_<Vetype>_long")]
)
(define_insn "aarch64_sqdmlsl<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_minus:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VSD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(match_operand:VSD_HSI 3 "register_operand" "w")))
(const_int 1))))]
"TARGET_SIMD"
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
"sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
[(set_attr "type" "neon_sat_mla_<Vetype>_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vqdml[sa]l_lane
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(define_insn "aarch64_sqdmlal_lane<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
))
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlsl_lane<mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(ss_minus:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
))
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(const_int 1))))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
return
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
"sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(define_insn "aarch64_sqdmlsl_laneq<mode>"
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(ss_minus:<VWIDE>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
))
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(const_int 1))))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
"sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(define_insn "aarch64_sqdmlal_laneq<mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
))
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal_lane<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:SD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
)
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlsl_lane<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_minus:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:SD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
)
(const_int 1))))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
return
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
"sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal_laneq<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:SD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
)
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(define_insn "aarch64_sqdmlsl_laneq<mode>"
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(ss_minus:<VWIDE>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:SD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
)
(const_int 1))))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
"sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vqdml[sa]l_n
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(define_insn "aarch64_sqdmlsl_n<mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(ss_minus:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(const_int 1))))]
"TARGET_SIMD"
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
"sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(define_insn "aarch64_sqdmlal_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; sqdml[as]l2
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(define_insn "aarch64_sqdmlal2<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 3 "register_operand" "w")
(match_dup 4))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlsl2<mode>_internal"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
(ss_minus:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 3 "register_operand" "w")
(match_dup 4))))
(const_int 1))))]
"TARGET_SIMD"
aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
2021-01-21 15:06:16 +01:00
"sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(SBINQOPS:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand")
(match_dup 1))
(match_operand:VQ_HSI 2 "register_operand")
(match_operand:VQ_HSI 3 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
operands[1], operands[2],
operands[3], p));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
;; vqdml[sa]l2_lane
(define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_minus:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
return
"sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal2_lane<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_minus:<VWIDE>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(SBINQOPS:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand")
(match_dup 1))
(match_operand:VQ_HSI 2 "register_operand")
(match_operand:<VCOND> 3 "register_operand")
(match_operand:SI 4 "immediate_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
operands[1], operands[2],
operands[3], operands[4], p));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
(define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(SBINQOPS:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand")
(match_dup 1))
(match_operand:VQ_HSI 2 "register_operand")
(match_operand:<VCONQ> 3 "register_operand")
(match_operand:SI 4 "immediate_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
operands[1], operands[2],
operands[3], operands[4], p));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
(define_insn "aarch64_sqdmlsl2_n<mode>_internal"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_minus:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(const_int 1))))]
"TARGET_SIMD"
"sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmlal2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(SBINQOPS:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand")
(match_dup 1))
(match_operand:VQ_HSI 2 "register_operand")
(match_operand:<VEL> 3 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
operands[1], operands[2],
operands[3], p));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
;; vqdmull
(define_insn "aarch64_sqdmull<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VSD_HSI 1 "register_operand" "w"))
(sign_extend:<VWIDE>
(match_operand:VSD_HSI 2 "register_operand" "w")))
(const_int 1)))]
"TARGET_SIMD"
"sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_sat_mul_<Vetype>_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vqdmull_lane
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness gcc/: * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_index. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to... (aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New. (aarch64_types_getlane_qualifiers): Rename to... (aarch64_types_binop_imm_qualifiers): ...this. (TYPES_SHIFTIMM): Follow renaming. (TYPES_GETLANE): Rename to... (TYPE_GETREG): ...this. (aarch64_types_setlane_qualifiers): Rename to... (aarch64_type_ternop_imm_qualifiers): ...this. (TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming. (TYPES_SETLANE): Follow renaming above, and rename self to... (TYPE_SETREG): ...this. (enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX. (aarch64_simd_expand_args): Add range check and endianness-flip. (aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index. * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check. (aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete. (aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this. (aarch64_sqdmull_lane<mode>_internal *2): Rename to... (aarch64_sqdmull_lane<mode>): ...this. (aarch64_sqdmull_laneq<mode>_internal *2): Rename to... (aarch64_sqdmull_laneq<mode>): ...this. (aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>, (aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>, aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>, aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete. (aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove bounds check and lane flip. * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane, get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi, set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG. (sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow renaming of TERNOP_LANE to QUADOP_LANE. (sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq, sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set qualifiers to TERNOP_LANE. gcc/testsuite/: * gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise. From-SVN: r217440
2014-11-12 19:51:53 +01:00
(define_insn "aarch64_sqdmull_lane<mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 1 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
(const_int 1)))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
)
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness gcc/: * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_index. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to... (aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New. (aarch64_types_getlane_qualifiers): Rename to... (aarch64_types_binop_imm_qualifiers): ...this. (TYPES_SHIFTIMM): Follow renaming. (TYPES_GETLANE): Rename to... (TYPE_GETREG): ...this. (aarch64_types_setlane_qualifiers): Rename to... (aarch64_type_ternop_imm_qualifiers): ...this. (TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming. (TYPES_SETLANE): Follow renaming above, and rename self to... (TYPE_SETREG): ...this. (enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX. (aarch64_simd_expand_args): Add range check and endianness-flip. (aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index. * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check. (aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete. (aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this. (aarch64_sqdmull_lane<mode>_internal *2): Rename to... (aarch64_sqdmull_lane<mode>): ...this. (aarch64_sqdmull_laneq<mode>_internal *2): Rename to... (aarch64_sqdmull_laneq<mode>): ...this. (aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>, (aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>, aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>, aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete. (aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove bounds check and lane flip. * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane, get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi, set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG. (sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow renaming of TERNOP_LANE to QUADOP_LANE. (sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq, sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set qualifiers to TERNOP_LANE. gcc/testsuite/: * gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise. From-SVN: r217440
2014-11-12 19:51:53 +01:00
(define_insn "aarch64_sqdmull_laneq<mode>"
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 1 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
(const_int 1)))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness gcc/: * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_index. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to... (aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New. (aarch64_types_getlane_qualifiers): Rename to... (aarch64_types_binop_imm_qualifiers): ...this. (TYPES_SHIFTIMM): Follow renaming. (TYPES_GETLANE): Rename to... (TYPE_GETREG): ...this. (aarch64_types_setlane_qualifiers): Rename to... (aarch64_type_ternop_imm_qualifiers): ...this. (TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming. (TYPES_SETLANE): Follow renaming above, and rename self to... (TYPE_SETREG): ...this. (enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX. (aarch64_simd_expand_args): Add range check and endianness-flip. (aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index. * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check. (aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete. (aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this. (aarch64_sqdmull_lane<mode>_internal *2): Rename to... (aarch64_sqdmull_lane<mode>): ...this. (aarch64_sqdmull_laneq<mode>_internal *2): Rename to... (aarch64_sqdmull_laneq<mode>): ...this. (aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>, (aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>, aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>, aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete. (aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove bounds check and lane flip. * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane, get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi, set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG. (sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow renaming of TERNOP_LANE to QUADOP_LANE. (sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq, sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set qualifiers to TERNOP_LANE. gcc/testsuite/: * gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise. From-SVN: r217440
2014-11-12 19:51:53 +01:00
(define_insn "aarch64_sqdmull_lane<mode>"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:SD_HSI 1 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))
))
(const_int 1)))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
)
Add bounds checking to vqdm*_lane intrinsics via a qualifier that also flips endianness gcc/: * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_index. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): Rename to... (aarch64_types_quadop_lane_qualifiers, TYPES_QUADOP_LANE): ...these. (aarch64_types_ternop_lane_qualifiers, TYPES_TERNOP_LANE): New. (aarch64_types_getlane_qualifiers): Rename to... (aarch64_types_binop_imm_qualifiers): ...this. (TYPES_SHIFTIMM): Follow renaming. (TYPES_GETLANE): Rename to... (TYPE_GETREG): ...this. (aarch64_types_setlane_qualifiers): Rename to... (aarch64_type_ternop_imm_qualifiers): ...this. (TYPES_SHIFTINSERT, TYPES_SHIFTACC): Follow renaming. (TYPES_SETLANE): Follow renaming above, and rename self to... (TYPE_SETREG): ...this. (enum builtin_simd_arg): Add SIMD_ARG_LANE_INDEX. (aarch64_simd_expand_args): Add range check and endianness-flip. (aarch64_simd_expand_builtin): Add mapping for qualifier_lane_index. * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh_lane<mode>_internal *2): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this, and remove lane bounds check. (aarch64_sqdmulh_lane<mode> *2, aarch64_sqrdmulh_lane<mode> *2): Delete. (aarch64_sq<r>dmulh_laneq<mode>_internal): Rename to... (aarch64_sq<r>dmulh_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): ...this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal *2): Rename to... (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): ...this. (aarch64_sqdmull_lane<mode>_internal *2): Rename to... (aarch64_sqdmull_lane<mode>): ...this. (aarch64_sqdmull_laneq<mode>_internal *2): Rename to... (aarch64_sqdmull_laneq<mode>): ...this. (aarch64_sqdmulh_laneq<mode>, aarch64_sqrdmulh_laneq<mode>, (aarch64_sqdmlal_lane<mode>, aarch64_sqdmlal_laneq<mode>, aarch64_sqdmlsl_lane<mode>, aarch64_sqdmlsl_laneq<mode>, aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Delete. (aarch64_sqdmlal2_lane<mode>, aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>, aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>): Remove bounds check and lane flip. * config/aarch64/aarch64-simd-builtins.def (be_checked_get_lane, get_dregoi, get_dregci, getdregxi, get_qregoi,get_qregci, get_qregxi, set_qregoi, set_qregci, set_qregxi): Change qualifiers to GETREG. (sqdmlal_lane, sqdmlsl_lane, sqdmlal_laneq, sqdmlsl_laneq, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq): Follow renaming of TERNOP_LANE to QUADOP_LANE. (sqdmull_lane, sqdmull_laneq, sqdmull2_lane, sqdmull2_laneq, sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq): Set qualifiers to TERNOP_LANE. gcc/testsuite/: * gcc.target/aarch64/simd/vqdmlal_high_lane_s16_indices_1.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulhs_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_lane_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32_indices_1.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32_indices_1.c: Likewise. From-SVN: r217440
2014-11-12 19:51:53 +01:00
(define_insn "aarch64_sqdmull_laneq<mode>"
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:SD_HSI 1 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))
))
(const_int 1)))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vqdmull_n
(define_insn "aarch64_sqdmull_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 1 "register_operand" "w"))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(match_operand:<VEL> 2 "register_operand" "<vwx>")))
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(const_int 1)))]
"TARGET_SIMD"
"sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vqdmull2
(define_insn "aarch64_sqdmull2<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 1 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 2 "register_operand" "w")
(match_dup 3)))
)
(const_int 1)))]
"TARGET_SIMD"
"sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_sqdmull2<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQ_HSI 1 "register_operand")
(match_operand:VQ_HSI 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
;; vqdmull2_lane
(define_insn "aarch64_sqdmull2_lane<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 1 "register_operand" "w")
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
(const_int 1)))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
)
(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 1 "register_operand" "w")
(match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(vec_select:<VEL>
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
(const_int 1)))]
"TARGET_SIMD"
{
[AArch64] Add an endian_lane_rtx helper routine Later patches turn the number of vector units into a poly_int. We deliberately don't support applying GEN_INT to those (except in target code that doesn't distinguish between poly_ints and normal constants); gen_int_mode needs to be used instead. This patch therefore replaces instances of: GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))) with uses of a new endian_lane_rtx function. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_endian_lane_rtx): Declare. * config/aarch64/aarch64.c (aarch64_endian_lane_rtx): New function. * config/aarch64/aarch64.h (ENDIAN_LANE_N): Take the number of units rather than the mode. * config/aarch64/iterators.md (nunits): New mode attribute. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use aarch64_endian_lane_rtx instead of GEN_INT (ENDIAN_LANE_N ...). * config/aarch64/aarch64-simd.md (aarch64_dup_lane<mode>) (aarch64_dup_lane_<vswap_width_name><mode>, *aarch64_mul3_elt<mode>) (*aarch64_mul3_elt_<vswap_width_name><mode>): Likewise. (*aarch64_mul3_elt_to_64v2df, *aarch64_mla_elt<mode>): Likewise. (*aarch64_mla_elt_<vswap_width_name><mode>, *aarch64_mls_elt<mode>) (*aarch64_mls_elt_<vswap_width_name><mode>, *aarch64_fma4_elt<mode>) (*aarch64_fma4_elt_<vswap_width_name><mode>):: Likewise. (*aarch64_fma4_elt_to_64v2df, *aarch64_fnma4_elt<mode>): Likewise. (*aarch64_fnma4_elt_<vswap_width_name><mode>): Likewise. (*aarch64_fnma4_elt_to_64v2df, reduc_plus_scal_<mode>): Likewise. (reduc_plus_scal_v4sf, reduc_<maxmin_uns>_scal_<mode>): Likewise. (reduc_<maxmin_uns>_scal_<mode>): Likewise. (*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>): Likewise. (*aarch64_get_lane_zero_extendsi<mode>): Likewise. (aarch64_get_lane<mode>, *aarch64_mulx_elt_<vswap_width_name><mode>) (*aarch64_mulx_elt<mode>, *aarch64_vgetfmulx<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>, aarch64_sq<r>dmulh_laneq<mode>) (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Likewise. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>, aarch64_sqdmull_laneq<mode>): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_vec_load_lanesoi_lane<mode>): Likewise. (aarch64_vec_store_lanesoi_lane<mode>): Likewise. (aarch64_vec_load_lanesci_lane<mode>): Likewise. (aarch64_vec_store_lanesci_lane<mode>): Likewise. (aarch64_vec_load_lanesxi_lane<mode>): Likewise. (aarch64_vec_store_lanesxi_lane<mode>): Likewise. (aarch64_simd_vec_set<mode>): Update use of ENDIAN_LANE_N. (aarch64_simd_vec_setv2di): Likewise. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254466
2017-11-06 21:02:10 +01:00
operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
}
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_sqdmull2_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQ_HSI 1 "register_operand")
(match_operand:<VCOND> 2 "register_operand")
(match_operand:SI 3 "immediate_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
DONE;
})
(define_expand "aarch64_sqdmull2_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQ_HSI 1 "register_operand")
(match_operand:<VCONQ> 2 "register_operand")
(match_operand:SI 3 "immediate_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
[AArch64] Fix some saturating math NEON intrinsics types. [gcc/] * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. Update comments. (VCONQ): Make comment more helpful. (VCON): Delete. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): Use VCOND for operands 2. Update lane checking and flipping logic. (aarch64_sqrdmulh_lane<mode>): Likewise. (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode attribute of operand 3 to VCOND. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_lane<mode>_internal): Likewise. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New define_insn. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. (aarch64_sqdmull2_laneq<mode>_internal): Likewise. (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate operand to VCOND. Update lane flipping and bounds checking logic. (aarch64_sqdmlal2_lane<mode>): Likewise. (aarch64_sqdmlsl_lane<mode>): Likewise. (aarch64_sqdmull_lane<mode>): Likewise. (aarch64_sqdmull2_lane<mode>): Likewise. (aarch64_sqdmlal_laneq<mode>): Replace VCON usage with VCONQ. Emit aarch64_sqdmlal_laneq<mode>_internal insn. (aarch64_sqdmlal2_laneq<mode>): Emit aarch64_sqdmlal2_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. (aarch64_sqdmlsl2_laneq<mode>): Likewise. (aarch64_sqdmull_laneq<mode>): Emit aarch64_sqdmull_laneq<mode>_internal insn. Replace VCON with VCONQ. (aarch64_sqdmull2_laneq<mode>): Emit aarch64_sqdmull2_laneq<mode>_internal insn. (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type of 3rd argument to int16x4_t. (vqdmlalh_lane_s16): Likewise. (vqdmlslh_lane_s16): Likewise. (vqdmull_high_lane_s16): Likewise. (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. (vqdmlal_lane_s16): Don't create temporary int16x8_t value. (vqdmlsl_lane_s16): Likewise. (vqdmull_lane_s16): Don't create temporary int16x8_t value. (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. (vqdmlals_lane_s32): Likewise. (vqdmlsls_lane_s32): Likewise. (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. (vqdmulls_lane_s32): Likewise. (vqdmlal_lane_s32): Don't create temporary int32x4_t value. (vqdmlsl_lane_s32): Likewise. (vqdmull_lane_s32): Don't create temporary int32x4_t value. (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. (vqrdmulhh_lane_s16): Likewise. (vqdmlsl_high_lane_s16): Likewise. (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. (vqdmlsl_high_lane_s32): Likewise. (vqrdmulhs_lane_s32): Likewise. [gcc/testsuite] * gcc.target/aarch64/simd/vqdmulhh_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhs_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_lane_s16.c: New test. * gcc.target/aarch64/simd/vqdmlal_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlal_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlsl_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_high_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmull_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise. * gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulh_laneq_s32.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s16.c: Likewise. * gcc.target/aarch64/simd/vqrdmulhq_laneq_s32.c: Likewise. * gcc.target/aarch64/vector_intrinsics.c: Simplify arm_neon.h include. (test_vqdmlal_high_lane_s16): Fix parameter type. (test_vqdmlal_high_lane_s32): Likewise. (test_vqdmull_high_lane_s16): Likewise. (test_vqdmull_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s32): Likewise. (test_vqdmlsl_high_lane_s16): Likewise. * gcc.target/aarch64/scalar_intrinsics.c (test_vqdmlalh_lane_s16): Fix argument type. (test_vqdmlals_lane_s32): Likewise. (test_vqdmlslh_lane_s16): Likewise. (test_vqdmlsls_lane_s32): Likewise. (test_vqdmulhh_lane_s16): Likewise. (test_vqdmulhs_lane_s32): Likewise. (test_vqdmullh_lane_s16): Likewise. (test_vqdmulls_lane_s32): Likewise. (test_vqrdmulhh_lane_s16): Likewise. (test_vqrdmulhs_lane_s32): Likewise. From-SVN: r211842
2014-06-20 10:51:34 +02:00
emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
operands[2], operands[3],
p));
DONE;
})
;; vqdmull2_n
(define_insn "aarch64_sqdmull2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
(match_operand:VQ_HSI 1 "register_operand" "w")
(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(vec_duplicate:<VWIDE>
(sign_extend:<VWIDE_S>
(match_operand:<VEL> 2 "register_operand" "<vwx>")))
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(const_int 1)))]
"TARGET_SIMD"
"sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "aarch64_sqdmull2_n<mode>"
[(match_operand:<VWIDE> 0 "register_operand")
(match_operand:VQ_HSI 1 "register_operand")
(match_operand:<VEL> 2 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half This patch passes the number of units to aarch64_simd_vect_par_cnst_half, which avoids a to_constant () once GET_MODE_NUNITS is variable. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_vect_par_cnst_half): Take the number of units too. * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Likewise. (aarch64_simd_check_vect_par_cnst_half): Update call accordingly, but check for a vector mode before rather than after the call. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (move_hi_quad_<mode>, vec_unpack<su>_hi_<mode>) (vec_unpack<su>_lo_<mode, vec_widen_<su>mult_lo_<mode>) (vec_widen_<su>mult_hi_<mode>, vec_unpacks_lo_<mode>) (vec_unpacks_hi_<mode>, aarch64_saddl2<mode>, aarch64_uaddl2<mode>) (aarch64_ssubl2<mode>, aarch64_usubl2<mode>, widen_ssum<mode>3) (widen_usum<mode>3, aarch64_saddw2<mode>, aarch64_uaddw2<mode>) (aarch64_ssubw2<mode>, aarch64_usubw2<mode>, aarch64_sqdmlal2<mode>) (aarch64_sqdmlsl2<mode>, aarch64_sqdmlal2_lane<mode>) (aarch64_sqdmlal2_laneq<mode>, aarch64_sqdmlsl2_lane<mode>) (aarch64_sqdmlsl2_laneq<mode>, aarch64_sqdmlal2_n<mode>) (aarch64_sqdmlsl2_n<mode>, aarch64_sqdmull2<mode>) (aarch64_sqdmull2_lane<mode>, aarch64_sqdmull2_laneq<mode>) (aarch64_sqdmull2_n<mode>): Update accordingly. Reviewed-by: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254468
2017-11-06 21:02:35 +01:00
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
;; vshl
(define_insn "aarch64_<sur>shl<mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
(unspec:VSDQ_I_DI
[(match_operand:VSDQ_I_DI 1 "register_operand" "w")
(match_operand:VSDQ_I_DI 2 "register_operand" "w")]
VSHL))]
"TARGET_SIMD"
"<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
[(set_attr "type" "neon_shift_reg<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vqshl
(define_insn "aarch64_<sur>q<r>shl<mode>"
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
(unspec:VSDQ_I
[(match_operand:VSDQ_I 1 "register_operand" "w")
(match_operand:VSDQ_I 2 "register_operand" "w")]
VQSHL))]
"TARGET_SIMD"
"<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
[(set_attr "type" "neon_sat_shift_reg<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
(match_operand:SI 2
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
VSHLL))]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
p, operands[2]));
DONE;
}
)
(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand")
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
(match_operand:SI 2
"immediate_operand" "i")]
VSHLL))]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
p, operands[2]));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; vshll_n
(define_insn "aarch64_<sur>shll<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(unspec:<VWIDE> [(vec_select:<VHALF>
(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
(match_operand:SI 3
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
VSHLL))]
"TARGET_SIMD"
{
if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
else
return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
}
[(set_attr "type" "neon_shift_imm_long")]
)
(define_insn "aarch64_<sur>shll2<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(unspec:<VWIDE> [(vec_select:<VHALF>
(match_operand:VQW 1 "register_operand" "w")
(match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
(match_operand:SI 3
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
VSHLL))]
"TARGET_SIMD"
{
if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
else
return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
}
[(set_attr "type" "neon_shift_imm_long")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_<sur>shll_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
(match_operand:SI 2
"aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
VSHLL))]
"TARGET_SIMD"
{
if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
else
return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
}
[(set_attr "type" "neon_shift_imm_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vshll_high_n
(define_insn "aarch64_<sur>shll2_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
VSHLL))]
"TARGET_SIMD"
{
if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
else
return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
}
[(set_attr "type" "neon_shift_imm_long")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vrshr_n
(define_insn "aarch64_<sur>shr_n<mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
(match_operand:SI 2
"aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
VRSHR_N))]
"TARGET_SIMD"
"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_sat_shift_imm<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; v(r)sra_n
(define_insn "aarch64_<sur>sra_n<mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
(match_operand:VSDQ_I_DI 2 "register_operand" "w")
(match_operand:SI 3
"aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
VSRA))]
"TARGET_SIMD"
"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
[(set_attr "type" "neon_shift_acc<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vs<lr>i_n
(define_insn "aarch64_<sur>s<lr>i_n<mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
(match_operand:VSDQ_I_DI 2 "register_operand" "w")
(match_operand:SI 3
"aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
VSLRI))]
"TARGET_SIMD"
"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
[(set_attr "type" "neon_shift_imm<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vqshl(u)
(define_insn "aarch64_<sur>qshl<u>_n<mode>"
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
(match_operand:SI 2
"aarch64_simd_shift_imm_<ve_mode>" "i")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
VQSHL_N))]
"TARGET_SIMD"
"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_sat_shift_imm<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; vq(r)shr(u)n_n
(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
(match_operand:SI 2
"aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
VQSHRN_N))]
"TARGET_SIMD"
"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ>
[(match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
VQSHRN_N)
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
(unspec:<VNARROWQ>
[(match_operand:VQN 1 "register_operand" "w")
(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
VQSHRN_N)))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
(match_operand:SI 2
"aarch64_simd_shift_imm_offset_<ve_mode>")]
VQSHRN_N))]
"TARGET_SIMD"
{
operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[2]));
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
else
emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
/* The intrinsic expects a narrow result, so emit a subreg that will get
optimized away as appropriate. */
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
<VNARROWQ2>mode));
DONE;
}
)
(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
aarch64: Use intrinsics for upper saturating shift right The use of vqshrn_high_n_s32 was triggering an unneeded register move, because sqshrn2 is destructive but was declared as inline assembly in arm_neon.h. This patch implements sqshrn2 and uqshrn2 as actual intrinsics which do not trigger the unnecessary move, along with new tests to cover them. gcc/ChangeLog 2020-11-06 David Candler <david.candler@arm.com> * config/aarch64/aarch64-builtins.c (TYPES_SHIFT2IMM): Add define. (TYPES_SHIFT2IMM_UUSS): Add define. (TYPES_USHIFT2IMM): Add define. * config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n2_n<mode>): Add new insn for upper saturating shift right. * config/aarch64/aarch64-simd-builtins.def: Add intrinsics. * config/aarch64/arm_neon.h: (vqrshrn_high_n_s16): Expand using intrinsic rather than inline asm. (vqrshrn_high_n_s32): Likewise. (vqrshrn_high_n_s64): Likewise. (vqrshrn_high_n_u16): Likewise. (vqrshrn_high_n_u32): Likewise. (vqrshrn_high_n_u64): Likewise. (vqrshrun_high_n_s16): Likewise. (vqrshrun_high_n_s32): Likewise. (vqrshrun_high_n_s64): Likewise. (vqshrn_high_n_s16): Likewise. (vqshrn_high_n_s32): Likewise. (vqshrn_high_n_s64): Likewise. (vqshrn_high_n_u16): Likewise. (vqshrn_high_n_u32): Likewise. (vqshrn_high_n_u64): Likewise. (vqshrun_high_n_s16): Likewise. (vqshrun_high_n_s32): Likewise. (vqshrun_high_n_s64): Likewise. gcc/testsuite/ChangeLog 2020-11-06 David Candler <david.candler@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c: New testcase. * gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c: Likewise. * gcc.target/aarch64/narrow_high-intrinsics.c: Update expected assembler for sqshrun2, sqrshrun2, sqshrn2, uqshrn2, sqrshrn2 and uqrshrn2.
2020-11-06 18:53:03 +01:00
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(match_operand:<VNARROWQ> 1 "register_operand" "0")
(unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3
"aarch64_simd_shift_imm_vec_<vn_mode>")]
VQSHRN_N)))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
aarch64: Use intrinsics for upper saturating shift right The use of vqshrn_high_n_s32 was triggering an unneeded register move, because sqshrn2 is destructive but was declared as inline assembly in arm_neon.h. This patch implements sqshrn2 and uqshrn2 as actual intrinsics which do not trigger the unnecessary move, along with new tests to cover them. gcc/ChangeLog 2020-11-06 David Candler <david.candler@arm.com> * config/aarch64/aarch64-builtins.c (TYPES_SHIFT2IMM): Add define. (TYPES_SHIFT2IMM_UUSS): Add define. (TYPES_USHIFT2IMM): Add define. * config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n2_n<mode>): Add new insn for upper saturating shift right. * config/aarch64/aarch64-simd-builtins.def: Add intrinsics. * config/aarch64/arm_neon.h: (vqrshrn_high_n_s16): Expand using intrinsic rather than inline asm. (vqrshrn_high_n_s32): Likewise. (vqrshrn_high_n_s64): Likewise. (vqrshrn_high_n_u16): Likewise. (vqrshrn_high_n_u32): Likewise. (vqrshrn_high_n_u64): Likewise. (vqrshrun_high_n_s16): Likewise. (vqrshrun_high_n_s32): Likewise. (vqrshrun_high_n_s64): Likewise. (vqshrn_high_n_s16): Likewise. (vqshrn_high_n_s32): Likewise. (vqshrn_high_n_s64): Likewise. (vqshrn_high_n_u16): Likewise. (vqshrn_high_n_u32): Likewise. (vqshrn_high_n_u64): Likewise. (vqshrun_high_n_s16): Likewise. (vqshrun_high_n_s32): Likewise. (vqshrun_high_n_s64): Likewise. gcc/testsuite/ChangeLog 2020-11-06 David Candler <david.candler@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c: New testcase. * gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c: Likewise. * gcc.target/aarch64/narrow_high-intrinsics.c: Update expected assembler for sqshrun2, sqrshrun2, sqshrn2, uqshrn2, sqrshrn2 and uqrshrn2.
2020-11-06 18:53:03 +01:00
"<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
(unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
(match_operand:VQN 3
"aarch64_simd_shift_imm_vec_<vn_mode>")]
VQSHRN_N)
(match_operand:<VNARROWQ> 1 "register_operand" "0")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
(define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>"
[(match_operand:<VNARROWQ2> 0 "register_operand")
(match_operand:<VNARROWQ> 1 "register_operand")
(unspec:<VNARROWQ>
[(match_operand:VQN 2 "register_operand")
(match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
VQSHRN_N)]
"TARGET_SIMD"
{
operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
INTVAL (operands[3]));
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be (operands[0],
operands[1], operands[2], operands[3]));
else
emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le (operands[0],
operands[1], operands[2], operands[3]));
DONE;
}
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; cm(eq|ge|gt|lt|le)
;; Note, we have constraints for Dz and Z as different expanders
;; have different ideas of what should be passed to this pattern.
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_cm<optab><mode>"
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
(neg:<V_INT_EQUIV>
(COMPARISONS:<V_INT_EQUIV>
(match_operand:VDQ_I 1 "register_operand" "w,w")
(match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
)))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"@
cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
[(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn_and_split "aarch64_cm<optab>di"
[(set (match_operand:DI 0 "register_operand" "=w,w,r")
(neg:DI
(COMPARISONS:DI
(match_operand:DI 1 "register_operand" "w,w,r")
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
)))
(clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"#"
"&& reload_completed"
[(set (match_operand:DI 0 "register_operand")
(neg:DI
(COMPARISONS:DI
(match_operand:DI 1 "register_operand")
(match_operand:DI 2 "aarch64_simd_reg_or_zero")
)))]
{
/* If we are in the general purpose register file,
we split to a sequence of comparison and store. */
if (GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1])))
{
decl.c, [...]: Remove redundant enum from machine_mode. gcc/ada/ * gcc-interface/decl.c, gcc-interface/gigi.h, gcc-interface/misc.c, gcc-interface/trans.c, gcc-interface/utils.c, gcc-interface/utils2.c: Remove redundant enum from machine_mode. gcc/c-family/ * c-common.c, c-common.h, c-cppbuiltin.c, c-lex.c: Remove redundant enum from machine_mode. gcc/c/ * c-decl.c, c-tree.h, c-typeck.c: Remove redundant enum from machine_mode. gcc/cp/ * constexpr.c: Remove redundant enum from machine_mode. gcc/fortran/ * trans-types.c, trans-types.h: Remove redundant enum from machine_mode. gcc/go/ * go-lang.c: Remove redundant enum from machine_mode. gcc/java/ * builtins.c, java-tree.h, typeck.c: Remove redundant enum from machine_mode. gcc/lto/ * lto-lang.c: Remove redundant enum from machine_mode. gcc/ * addresses.h, alias.c, asan.c, auto-inc-dec.c, bt-load.c, builtins.c, builtins.h, caller-save.c, calls.c, calls.h, cfgexpand.c, cfgloop.h, cfgrtl.c, combine.c, compare-elim.c, config/aarch64/aarch64-builtins.c, config/aarch64/aarch64-protos.h, config/aarch64/aarch64-simd.md, config/aarch64/aarch64.c, config/aarch64/aarch64.h, config/aarch64/aarch64.md, config/alpha/alpha-protos.h, config/alpha/alpha.c, config/arc/arc-protos.h, config/arc/arc.c, config/arc/arc.h, config/arc/predicates.md, config/arm/aarch-common-protos.h, config/arm/aarch-common.c, config/arm/arm-protos.h, config/arm/arm.c, config/arm/arm.h, config/arm/arm.md, config/arm/neon.md, config/arm/thumb2.md, config/avr/avr-log.c, config/avr/avr-protos.h, config/avr/avr.c, config/avr/avr.md, config/bfin/bfin-protos.h, config/bfin/bfin.c, config/c6x/c6x-protos.h, config/c6x/c6x.c, config/c6x/c6x.md, config/cr16/cr16-protos.h, config/cr16/cr16.c, config/cris/cris-protos.h, config/cris/cris.c, config/cris/cris.md, config/darwin-protos.h, config/darwin.c, config/epiphany/epiphany-protos.h, config/epiphany/epiphany.c, config/epiphany/epiphany.md, config/fr30/fr30.c, config/frv/frv-protos.h, config/frv/frv.c, config/frv/predicates.md, config/h8300/h8300-protos.h, config/h8300/h8300.c, config/i386/i386-builtin-types.awk, config/i386/i386-protos.h, config/i386/i386.c, config/i386/i386.md, config/i386/predicates.md, config/i386/sse.md, config/i386/sync.md, config/ia64/ia64-protos.h, config/ia64/ia64.c, config/iq2000/iq2000-protos.h, config/iq2000/iq2000.c, config/iq2000/iq2000.md, config/lm32/lm32-protos.h, config/lm32/lm32.c, config/m32c/m32c-protos.h, config/m32c/m32c.c, config/m32r/m32r-protos.h, config/m32r/m32r.c, config/m68k/m68k-protos.h, config/m68k/m68k.c, config/mcore/mcore-protos.h, config/mcore/mcore.c, config/mcore/mcore.md, config/mep/mep-protos.h, config/mep/mep.c, config/microblaze/microblaze-protos.h, config/microblaze/microblaze.c, config/mips/mips-protos.h, config/mips/mips.c, config/mmix/mmix-protos.h, config/mmix/mmix.c, config/mn10300/mn10300-protos.h, config/mn10300/mn10300.c, config/moxie/moxie.c, config/msp430/msp430-protos.h, config/msp430/msp430.c, config/nds32/nds32-cost.c, config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c, config/nds32/nds32-protos.h, config/nds32/nds32.c, config/nios2/nios2-protos.h, config/nios2/nios2.c, config/pa/pa-protos.h, config/pa/pa.c, config/pdp11/pdp11-protos.h, config/pdp11/pdp11.c, config/rl78/rl78-protos.h, config/rl78/rl78.c, config/rs6000/altivec.md, config/rs6000/rs6000-c.c, config/rs6000/rs6000-protos.h, config/rs6000/rs6000.c, config/rs6000/rs6000.h, config/rx/rx-protos.h, config/rx/rx.c, config/s390/predicates.md, config/s390/s390-protos.h, config/s390/s390.c, config/s390/s390.h, config/s390/s390.md, config/sh/predicates.md, config/sh/sh-protos.h, config/sh/sh.c, config/sh/sh.md, config/sparc/predicates.md, config/sparc/sparc-protos.h, config/sparc/sparc.c, config/sparc/sparc.md, config/spu/spu-protos.h, config/spu/spu.c, config/stormy16/stormy16-protos.h, config/stormy16/stormy16.c, config/tilegx/tilegx-protos.h, config/tilegx/tilegx.c, config/tilegx/tilegx.md, config/tilepro/tilepro-protos.h, config/tilepro/tilepro.c, config/v850/v850-protos.h, config/v850/v850.c, config/v850/v850.md, config/vax/vax-protos.h, config/vax/vax.c, config/vms/vms-c.c, config/xtensa/xtensa-protos.h, config/xtensa/xtensa.c, coverage.c, cprop.c, cse.c, cselib.c, cselib.h, dbxout.c, ddg.c, df-problems.c, dfp.c, dfp.h, doc/md.texi, doc/rtl.texi, doc/tm.texi, doc/tm.texi.in, dojump.c, dse.c, dwarf2cfi.c, dwarf2out.c, dwarf2out.h, emit-rtl.c, emit-rtl.h, except.c, explow.c, expmed.c, expmed.h, expr.c, expr.h, final.c, fixed-value.c, fixed-value.h, fold-const.c, function.c, function.h, fwprop.c, gcse.c, gengenrtl.c, genmodes.c, genopinit.c, genoutput.c, genpreds.c, genrecog.c, gensupport.c, gimple-ssa-strength-reduction.c, graphite-clast-to-gimple.c, haifa-sched.c, hooks.c, hooks.h, ifcvt.c, internal-fn.c, ira-build.c, ira-color.c, ira-conflicts.c, ira-costs.c, ira-emit.c, ira-int.h, ira-lives.c, ira.c, ira.h, jump.c, langhooks.h, libfuncs.h, lists.c, loop-doloop.c, loop-invariant.c, loop-iv.c, loop-unroll.c, lower-subreg.c, lower-subreg.h, lra-assigns.c, lra-constraints.c, lra-eliminations.c, lra-int.h, lra-lives.c, lra-spills.c, lra.c, lra.h, machmode.h, omp-low.c, optabs.c, optabs.h, output.h, postreload.c, print-tree.c, read-rtl.c, real.c, real.h, recog.c, recog.h, ree.c, reg-stack.c, regcprop.c, reginfo.c, regrename.c, regs.h, reload.c, reload.h, reload1.c, rtl.c, rtl.h, rtlanal.c, rtlhash.c, rtlhooks-def.h, rtlhooks.c, sched-deps.c, sel-sched-dump.c, sel-sched-ir.c, sel-sched-ir.h, sel-sched.c, simplify-rtx.c, stmt.c, stor-layout.c, stor-layout.h, target.def, targhooks.c, targhooks.h, tree-affine.c, tree-call-cdce.c, tree-complex.c, tree-data-ref.c, tree-dfa.c, tree-if-conv.c, tree-inline.c, tree-outof-ssa.c, tree-scalar-evolution.c, tree-ssa-address.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c, tree-ssa-loop-ivopts.h, tree-ssa-loop-manip.c, tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-ssa-reassoc.c, tree-ssa-sccvn.c, tree-streamer-in.c, tree-switch-conversion.c, tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c, tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c, tree-vrp.c, tree.c, tree.h, tsan.c, ubsan.c, valtrack.c, var-tracking.c, varasm.c: Remove redundant enum from machine_mode. gcc/ * gengtype.c (main): Treat machine_mode as a scalar typedef. * genmodes.c (emit_insn_modes_h): Hide inline functions if USED_FOR_TARGET. From-SVN: r216834
2014-10-29 13:02:45 +01:00
machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
DONE;
}
/* Otherwise, we expand to a similar pattern which does not
clobber CC_REGNUM. */
}
[(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
)
(define_insn "*aarch64_cm<optab>di"
[(set (match_operand:DI 0 "register_operand" "=w,w")
(neg:DI
(COMPARISONS:DI
(match_operand:DI 1 "register_operand" "w,w")
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
)))]
"TARGET_SIMD && reload_completed"
"@
cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
cm<optab>\t%d0, %d1, #0"
[(set_attr "type" "neon_compare, neon_compare_zero")]
)
;; cm(hs|hi)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "aarch64_cm<optab><mode>"
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
(neg:<V_INT_EQUIV>
(UCOMPARISONS:<V_INT_EQUIV>
(match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")
)))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
[(set_attr "type" "neon_compare<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn_and_split "aarch64_cm<optab>di"
[(set (match_operand:DI 0 "register_operand" "=w,r")
(neg:DI
(UCOMPARISONS:DI
(match_operand:DI 1 "register_operand" "w,r")
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
)))
(clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"#"
"&& reload_completed"
[(set (match_operand:DI 0 "register_operand")
(neg:DI
(UCOMPARISONS:DI
(match_operand:DI 1 "register_operand")
(match_operand:DI 2 "aarch64_simd_reg_or_zero")
)))]
{
/* If we are in the general purpose register file,
we split to a sequence of comparison and store. */
if (GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1])))
{
decl.c, [...]: Remove redundant enum from machine_mode. gcc/ada/ * gcc-interface/decl.c, gcc-interface/gigi.h, gcc-interface/misc.c, gcc-interface/trans.c, gcc-interface/utils.c, gcc-interface/utils2.c: Remove redundant enum from machine_mode. gcc/c-family/ * c-common.c, c-common.h, c-cppbuiltin.c, c-lex.c: Remove redundant enum from machine_mode. gcc/c/ * c-decl.c, c-tree.h, c-typeck.c: Remove redundant enum from machine_mode. gcc/cp/ * constexpr.c: Remove redundant enum from machine_mode. gcc/fortran/ * trans-types.c, trans-types.h: Remove redundant enum from machine_mode. gcc/go/ * go-lang.c: Remove redundant enum from machine_mode. gcc/java/ * builtins.c, java-tree.h, typeck.c: Remove redundant enum from machine_mode. gcc/lto/ * lto-lang.c: Remove redundant enum from machine_mode. gcc/ * addresses.h, alias.c, asan.c, auto-inc-dec.c, bt-load.c, builtins.c, builtins.h, caller-save.c, calls.c, calls.h, cfgexpand.c, cfgloop.h, cfgrtl.c, combine.c, compare-elim.c, config/aarch64/aarch64-builtins.c, config/aarch64/aarch64-protos.h, config/aarch64/aarch64-simd.md, config/aarch64/aarch64.c, config/aarch64/aarch64.h, config/aarch64/aarch64.md, config/alpha/alpha-protos.h, config/alpha/alpha.c, config/arc/arc-protos.h, config/arc/arc.c, config/arc/arc.h, config/arc/predicates.md, config/arm/aarch-common-protos.h, config/arm/aarch-common.c, config/arm/arm-protos.h, config/arm/arm.c, config/arm/arm.h, config/arm/arm.md, config/arm/neon.md, config/arm/thumb2.md, config/avr/avr-log.c, config/avr/avr-protos.h, config/avr/avr.c, config/avr/avr.md, config/bfin/bfin-protos.h, config/bfin/bfin.c, config/c6x/c6x-protos.h, config/c6x/c6x.c, config/c6x/c6x.md, config/cr16/cr16-protos.h, config/cr16/cr16.c, config/cris/cris-protos.h, config/cris/cris.c, config/cris/cris.md, config/darwin-protos.h, config/darwin.c, config/epiphany/epiphany-protos.h, config/epiphany/epiphany.c, config/epiphany/epiphany.md, config/fr30/fr30.c, config/frv/frv-protos.h, config/frv/frv.c, config/frv/predicates.md, config/h8300/h8300-protos.h, config/h8300/h8300.c, config/i386/i386-builtin-types.awk, config/i386/i386-protos.h, config/i386/i386.c, config/i386/i386.md, config/i386/predicates.md, config/i386/sse.md, config/i386/sync.md, config/ia64/ia64-protos.h, config/ia64/ia64.c, config/iq2000/iq2000-protos.h, config/iq2000/iq2000.c, config/iq2000/iq2000.md, config/lm32/lm32-protos.h, config/lm32/lm32.c, config/m32c/m32c-protos.h, config/m32c/m32c.c, config/m32r/m32r-protos.h, config/m32r/m32r.c, config/m68k/m68k-protos.h, config/m68k/m68k.c, config/mcore/mcore-protos.h, config/mcore/mcore.c, config/mcore/mcore.md, config/mep/mep-protos.h, config/mep/mep.c, config/microblaze/microblaze-protos.h, config/microblaze/microblaze.c, config/mips/mips-protos.h, config/mips/mips.c, config/mmix/mmix-protos.h, config/mmix/mmix.c, config/mn10300/mn10300-protos.h, config/mn10300/mn10300.c, config/moxie/moxie.c, config/msp430/msp430-protos.h, config/msp430/msp430.c, config/nds32/nds32-cost.c, config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c, config/nds32/nds32-protos.h, config/nds32/nds32.c, config/nios2/nios2-protos.h, config/nios2/nios2.c, config/pa/pa-protos.h, config/pa/pa.c, config/pdp11/pdp11-protos.h, config/pdp11/pdp11.c, config/rl78/rl78-protos.h, config/rl78/rl78.c, config/rs6000/altivec.md, config/rs6000/rs6000-c.c, config/rs6000/rs6000-protos.h, config/rs6000/rs6000.c, config/rs6000/rs6000.h, config/rx/rx-protos.h, config/rx/rx.c, config/s390/predicates.md, config/s390/s390-protos.h, config/s390/s390.c, config/s390/s390.h, config/s390/s390.md, config/sh/predicates.md, config/sh/sh-protos.h, config/sh/sh.c, config/sh/sh.md, config/sparc/predicates.md, config/sparc/sparc-protos.h, config/sparc/sparc.c, config/sparc/sparc.md, config/spu/spu-protos.h, config/spu/spu.c, config/stormy16/stormy16-protos.h, config/stormy16/stormy16.c, config/tilegx/tilegx-protos.h, config/tilegx/tilegx.c, config/tilegx/tilegx.md, config/tilepro/tilepro-protos.h, config/tilepro/tilepro.c, config/v850/v850-protos.h, config/v850/v850.c, config/v850/v850.md, config/vax/vax-protos.h, config/vax/vax.c, config/vms/vms-c.c, config/xtensa/xtensa-protos.h, config/xtensa/xtensa.c, coverage.c, cprop.c, cse.c, cselib.c, cselib.h, dbxout.c, ddg.c, df-problems.c, dfp.c, dfp.h, doc/md.texi, doc/rtl.texi, doc/tm.texi, doc/tm.texi.in, dojump.c, dse.c, dwarf2cfi.c, dwarf2out.c, dwarf2out.h, emit-rtl.c, emit-rtl.h, except.c, explow.c, expmed.c, expmed.h, expr.c, expr.h, final.c, fixed-value.c, fixed-value.h, fold-const.c, function.c, function.h, fwprop.c, gcse.c, gengenrtl.c, genmodes.c, genopinit.c, genoutput.c, genpreds.c, genrecog.c, gensupport.c, gimple-ssa-strength-reduction.c, graphite-clast-to-gimple.c, haifa-sched.c, hooks.c, hooks.h, ifcvt.c, internal-fn.c, ira-build.c, ira-color.c, ira-conflicts.c, ira-costs.c, ira-emit.c, ira-int.h, ira-lives.c, ira.c, ira.h, jump.c, langhooks.h, libfuncs.h, lists.c, loop-doloop.c, loop-invariant.c, loop-iv.c, loop-unroll.c, lower-subreg.c, lower-subreg.h, lra-assigns.c, lra-constraints.c, lra-eliminations.c, lra-int.h, lra-lives.c, lra-spills.c, lra.c, lra.h, machmode.h, omp-low.c, optabs.c, optabs.h, output.h, postreload.c, print-tree.c, read-rtl.c, real.c, real.h, recog.c, recog.h, ree.c, reg-stack.c, regcprop.c, reginfo.c, regrename.c, regs.h, reload.c, reload.h, reload1.c, rtl.c, rtl.h, rtlanal.c, rtlhash.c, rtlhooks-def.h, rtlhooks.c, sched-deps.c, sel-sched-dump.c, sel-sched-ir.c, sel-sched-ir.h, sel-sched.c, simplify-rtx.c, stmt.c, stor-layout.c, stor-layout.h, target.def, targhooks.c, targhooks.h, tree-affine.c, tree-call-cdce.c, tree-complex.c, tree-data-ref.c, tree-dfa.c, tree-if-conv.c, tree-inline.c, tree-outof-ssa.c, tree-scalar-evolution.c, tree-ssa-address.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c, tree-ssa-loop-ivopts.h, tree-ssa-loop-manip.c, tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-ssa-reassoc.c, tree-ssa-sccvn.c, tree-streamer-in.c, tree-switch-conversion.c, tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c, tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c, tree-vrp.c, tree.c, tree.h, tsan.c, ubsan.c, valtrack.c, var-tracking.c, varasm.c: Remove redundant enum from machine_mode. gcc/ * gengtype.c (main): Treat machine_mode as a scalar typedef. * genmodes.c (emit_insn_modes_h): Hide inline functions if USED_FOR_TARGET. From-SVN: r216834
2014-10-29 13:02:45 +01:00
machine_mode mode = CCmode;
rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
DONE;
}
/* Otherwise, we expand to a similar pattern which does not
clobber CC_REGNUM. */
}
[(set_attr "type" "neon_compare,multiple")]
)
(define_insn "*aarch64_cm<optab>di"
[(set (match_operand:DI 0 "register_operand" "=w")
(neg:DI
(UCOMPARISONS:DI
(match_operand:DI 1 "register_operand" "w")
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
)))]
"TARGET_SIMD && reload_completed"
"cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
[(set_attr "type" "neon_compare")]
)
;; cmtst
;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
;; we don't have any insns using ne, and aarch64_vcond outputs
;; not (neg (eq (and x y) 0))
;; which is rewritten by simplify_rtx as
;; plus (eq (and x y) 0) -1.
(define_insn "aarch64_cmtst<mode>"
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
(plus:<V_INT_EQUIV>
(eq:<V_INT_EQUIV>
(and:VDQ_I
(match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w"))
(match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
(match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
]
"TARGET_SIMD"
"cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_tst<q>")]
)
;; One can also get a cmtsts by having to combine a
;; not (neq (eq x 0)) in which case you rewrite it to
;; a comparison against itself
(define_insn "*aarch64_cmtst_same_<mode>"
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
(plus:<V_INT_EQUIV>
(eq:<V_INT_EQUIV>
(match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
(match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
]
"TARGET_SIMD"
"cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
[(set_attr "type" "neon_tst<q>")]
)
(define_insn_and_split "aarch64_cmtstdi"
[(set (match_operand:DI 0 "register_operand" "=w,r")
(neg:DI
(ne:DI
(and:DI
(match_operand:DI 1 "register_operand" "w,r")
(match_operand:DI 2 "register_operand" "w,r"))
(const_int 0))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"#"
"&& reload_completed"
[(set (match_operand:DI 0 "register_operand")
(neg:DI
(ne:DI
(and:DI
(match_operand:DI 1 "register_operand")
(match_operand:DI 2 "register_operand"))
(const_int 0))))]
{
/* If we are in the general purpose register file,
we split to a sequence of comparison and store. */
if (GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1])))
{
rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
decl.c, [...]: Remove redundant enum from machine_mode. gcc/ada/ * gcc-interface/decl.c, gcc-interface/gigi.h, gcc-interface/misc.c, gcc-interface/trans.c, gcc-interface/utils.c, gcc-interface/utils2.c: Remove redundant enum from machine_mode. gcc/c-family/ * c-common.c, c-common.h, c-cppbuiltin.c, c-lex.c: Remove redundant enum from machine_mode. gcc/c/ * c-decl.c, c-tree.h, c-typeck.c: Remove redundant enum from machine_mode. gcc/cp/ * constexpr.c: Remove redundant enum from machine_mode. gcc/fortran/ * trans-types.c, trans-types.h: Remove redundant enum from machine_mode. gcc/go/ * go-lang.c: Remove redundant enum from machine_mode. gcc/java/ * builtins.c, java-tree.h, typeck.c: Remove redundant enum from machine_mode. gcc/lto/ * lto-lang.c: Remove redundant enum from machine_mode. gcc/ * addresses.h, alias.c, asan.c, auto-inc-dec.c, bt-load.c, builtins.c, builtins.h, caller-save.c, calls.c, calls.h, cfgexpand.c, cfgloop.h, cfgrtl.c, combine.c, compare-elim.c, config/aarch64/aarch64-builtins.c, config/aarch64/aarch64-protos.h, config/aarch64/aarch64-simd.md, config/aarch64/aarch64.c, config/aarch64/aarch64.h, config/aarch64/aarch64.md, config/alpha/alpha-protos.h, config/alpha/alpha.c, config/arc/arc-protos.h, config/arc/arc.c, config/arc/arc.h, config/arc/predicates.md, config/arm/aarch-common-protos.h, config/arm/aarch-common.c, config/arm/arm-protos.h, config/arm/arm.c, config/arm/arm.h, config/arm/arm.md, config/arm/neon.md, config/arm/thumb2.md, config/avr/avr-log.c, config/avr/avr-protos.h, config/avr/avr.c, config/avr/avr.md, config/bfin/bfin-protos.h, config/bfin/bfin.c, config/c6x/c6x-protos.h, config/c6x/c6x.c, config/c6x/c6x.md, config/cr16/cr16-protos.h, config/cr16/cr16.c, config/cris/cris-protos.h, config/cris/cris.c, config/cris/cris.md, config/darwin-protos.h, config/darwin.c, config/epiphany/epiphany-protos.h, config/epiphany/epiphany.c, config/epiphany/epiphany.md, config/fr30/fr30.c, config/frv/frv-protos.h, config/frv/frv.c, config/frv/predicates.md, config/h8300/h8300-protos.h, config/h8300/h8300.c, config/i386/i386-builtin-types.awk, config/i386/i386-protos.h, config/i386/i386.c, config/i386/i386.md, config/i386/predicates.md, config/i386/sse.md, config/i386/sync.md, config/ia64/ia64-protos.h, config/ia64/ia64.c, config/iq2000/iq2000-protos.h, config/iq2000/iq2000.c, config/iq2000/iq2000.md, config/lm32/lm32-protos.h, config/lm32/lm32.c, config/m32c/m32c-protos.h, config/m32c/m32c.c, config/m32r/m32r-protos.h, config/m32r/m32r.c, config/m68k/m68k-protos.h, config/m68k/m68k.c, config/mcore/mcore-protos.h, config/mcore/mcore.c, config/mcore/mcore.md, config/mep/mep-protos.h, config/mep/mep.c, config/microblaze/microblaze-protos.h, config/microblaze/microblaze.c, config/mips/mips-protos.h, config/mips/mips.c, config/mmix/mmix-protos.h, config/mmix/mmix.c, config/mn10300/mn10300-protos.h, config/mn10300/mn10300.c, config/moxie/moxie.c, config/msp430/msp430-protos.h, config/msp430/msp430.c, config/nds32/nds32-cost.c, config/nds32/nds32-intrinsic.c, config/nds32/nds32-md-auxiliary.c, config/nds32/nds32-protos.h, config/nds32/nds32.c, config/nios2/nios2-protos.h, config/nios2/nios2.c, config/pa/pa-protos.h, config/pa/pa.c, config/pdp11/pdp11-protos.h, config/pdp11/pdp11.c, config/rl78/rl78-protos.h, config/rl78/rl78.c, config/rs6000/altivec.md, config/rs6000/rs6000-c.c, config/rs6000/rs6000-protos.h, config/rs6000/rs6000.c, config/rs6000/rs6000.h, config/rx/rx-protos.h, config/rx/rx.c, config/s390/predicates.md, config/s390/s390-protos.h, config/s390/s390.c, config/s390/s390.h, config/s390/s390.md, config/sh/predicates.md, config/sh/sh-protos.h, config/sh/sh.c, config/sh/sh.md, config/sparc/predicates.md, config/sparc/sparc-protos.h, config/sparc/sparc.c, config/sparc/sparc.md, config/spu/spu-protos.h, config/spu/spu.c, config/stormy16/stormy16-protos.h, config/stormy16/stormy16.c, config/tilegx/tilegx-protos.h, config/tilegx/tilegx.c, config/tilegx/tilegx.md, config/tilepro/tilepro-protos.h, config/tilepro/tilepro.c, config/v850/v850-protos.h, config/v850/v850.c, config/v850/v850.md, config/vax/vax-protos.h, config/vax/vax.c, config/vms/vms-c.c, config/xtensa/xtensa-protos.h, config/xtensa/xtensa.c, coverage.c, cprop.c, cse.c, cselib.c, cselib.h, dbxout.c, ddg.c, df-problems.c, dfp.c, dfp.h, doc/md.texi, doc/rtl.texi, doc/tm.texi, doc/tm.texi.in, dojump.c, dse.c, dwarf2cfi.c, dwarf2out.c, dwarf2out.h, emit-rtl.c, emit-rtl.h, except.c, explow.c, expmed.c, expmed.h, expr.c, expr.h, final.c, fixed-value.c, fixed-value.h, fold-const.c, function.c, function.h, fwprop.c, gcse.c, gengenrtl.c, genmodes.c, genopinit.c, genoutput.c, genpreds.c, genrecog.c, gensupport.c, gimple-ssa-strength-reduction.c, graphite-clast-to-gimple.c, haifa-sched.c, hooks.c, hooks.h, ifcvt.c, internal-fn.c, ira-build.c, ira-color.c, ira-conflicts.c, ira-costs.c, ira-emit.c, ira-int.h, ira-lives.c, ira.c, ira.h, jump.c, langhooks.h, libfuncs.h, lists.c, loop-doloop.c, loop-invariant.c, loop-iv.c, loop-unroll.c, lower-subreg.c, lower-subreg.h, lra-assigns.c, lra-constraints.c, lra-eliminations.c, lra-int.h, lra-lives.c, lra-spills.c, lra.c, lra.h, machmode.h, omp-low.c, optabs.c, optabs.h, output.h, postreload.c, print-tree.c, read-rtl.c, real.c, real.h, recog.c, recog.h, ree.c, reg-stack.c, regcprop.c, reginfo.c, regrename.c, regs.h, reload.c, reload.h, reload1.c, rtl.c, rtl.h, rtlanal.c, rtlhash.c, rtlhooks-def.h, rtlhooks.c, sched-deps.c, sel-sched-dump.c, sel-sched-ir.c, sel-sched-ir.h, sel-sched.c, simplify-rtx.c, stmt.c, stor-layout.c, stor-layout.h, target.def, targhooks.c, targhooks.h, tree-affine.c, tree-call-cdce.c, tree-complex.c, tree-data-ref.c, tree-dfa.c, tree-if-conv.c, tree-inline.c, tree-outof-ssa.c, tree-scalar-evolution.c, tree-ssa-address.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c, tree-ssa-loop-ivopts.h, tree-ssa-loop-manip.c, tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-ssa-reassoc.c, tree-ssa-sccvn.c, tree-streamer-in.c, tree-switch-conversion.c, tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c, tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c, tree-vrp.c, tree.c, tree.h, tsan.c, ubsan.c, valtrack.c, var-tracking.c, varasm.c: Remove redundant enum from machine_mode. gcc/ * gengtype.c (main): Treat machine_mode as a scalar typedef. * genmodes.c (emit_insn_modes_h): Hide inline functions if USED_FOR_TARGET. From-SVN: r216834
2014-10-29 13:02:45 +01:00
machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
DONE;
}
/* Otherwise, we expand to a similar pattern which does not
clobber CC_REGNUM. */
}
[(set_attr "type" "neon_tst,multiple")]
)
(define_insn "*aarch64_cmtstdi"
[(set (match_operand:DI 0 "register_operand" "=w")
(neg:DI
(ne:DI
(and:DI
(match_operand:DI 1 "register_operand" "w")
(match_operand:DI 2 "register_operand" "w"))
(const_int 0))))]
"TARGET_SIMD"
"cmtst\t%d0, %d1, %d2"
[(set_attr "type" "neon_tst")]
)
;; fcm(eq|ge|gt|le|lt)
(define_insn "aarch64_cm<optab><mode>"
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
(neg:<V_INT_EQUIV>
(COMPARISONS:<V_INT_EQUIV>
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics gcc/ * config.gcc (aarch64*-*-*): Install arm_fp16.h. * config/aarch64/aarch64-builtins.c (hi_UP): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF mode. (aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise. (fix_trunc<GPF:mode><GPI:mode>2): Likewise. (sqrt<mode>2): Likewise. (abs<mode>2): Likewise. (<optab><mode>hf2): New pattern for HF mode. (<optab>hihf2): Likewise. * config/aarch64/arm_neon.h: Include arm_fp16.h. * config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New. (w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE): Support HF mode. * config/aarch64/arm_fp16.h: New file. (vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16, vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16, vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16, vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16, vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16, vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16, vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16, vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16, vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16, vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16, vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16, vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16, vsqrth_f16): New. From-SVN: r238722
2016-07-25 18:00:28 +02:00
(match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
(match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
)))]
"TARGET_SIMD"
"@
fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_fp_compare_<stype><q>")]
)
;; fac(ge|gt)
;; Note we can also handle what would be fac(le|lt) by
;; generating fac(ge|gt).
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
(define_insn "aarch64_fac<optab><mode>"
[AArch64] Rename cmp_result iterator The comparison results provided by the V_cmp_result/v_cmp_result attribute were simply the corresponding integer vector. We'd also like to have easy access to the integer vector for SVE, but using "cmp_result" would be confusing because SVE comparisons return predicates instead of vectors. This patch therefore renames the attributes to the more general V_INT_EQUIV/v_int_equiv instead. As to the capitalisation: there are already many iterators that use all lowercase vs. all uppercase names to distinguish all lowercase vs. all uppercase expansions (e.g. fcvt_target and FCVT_TARGET). It's also the convention used for the built-in mode/MODE/code/CODE/etc. attributes. IMO those names are easier to read at a glance, rather than relying on a single letter's difference. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/iterators.md (V_cmp_result): Rename to... (V_INT_EQUIV): ...this. (v_cmp_result): Rename to... (v_int_equiv): ...this. * config/aarch64/aarch64.md (xorsign<mode>3): Update accordingly. * config/aarch64/aarch64-simd.md (xorsign<mode>3): Likewise. (copysign<mode>3): Likewise. (aarch64_simd_bsl<mode>_internal): Likewise. (aarch64_simd_bsl<mode>): Likewise. (vec_cmp<mode><mode>): Likewise. (vcond<mode><mode>): Likewise. (vcond<v_cmp_mixed><mode>): Likewise. (vcondu<mode><v_cmp_mixed>): Likewise. (aarch64_cm<optab><mode>): Likewise. (aarch64_cmtst<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (vec_perm_const<mode>): Likewise. (vcond_mask_<mode><v_cmp_result>): Rename to... (vcond_mask_<mode><v_int_equiv>): ...this. (vec_cmp<mode><v_cmp_result>): Rename to... (vec_cmp<mode><v_int_equiv>): ...this. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251556
2017-08-31 11:52:38 +02:00
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
(neg:<V_INT_EQUIV>
(FAC_COMPARISONS:<V_INT_EQUIV>
(abs:VHSDF_HSDF
(match_operand:VHSDF_HSDF 1 "register_operand" "w"))
(abs:VHSDF_HSDF
(match_operand:VHSDF_HSDF 2 "register_operand" "w"))
)))]
"TARGET_SIMD"
"fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_compare_<stype><q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; addp
(define_insn "aarch64_addp<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(unspec:VDQ_I
[(match_operand:VDQ_I 1 "register_operand" "w")
(match_operand:VDQ_I 2 "register_operand" "w")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
UNSPEC_ADDP))]
"TARGET_SIMD"
"addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_reduc_add<q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
(define_insn "aarch64_addpdi"
[(set (match_operand:DI 0 "register_operand" "=w")
(unspec:DI
[(match_operand:V2DI 1 "register_operand" "w")]
UNSPEC_ADDP))]
"TARGET_SIMD"
"addp\t%d0, %1.2d"
[(set_attr "type" "neon_reduc_add")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; sqrt
(define_expand "sqrt<mode>2"
[(set (match_operand:VHSDF 0 "register_operand")
(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
"TARGET_SIMD"
{
if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
DONE;
})
(define_insn "*sqrt<mode>2"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"fsqrt\\t%0.<Vtype>, %1.<Vtype>"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_fp_sqrt_<stype><q>")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
)
;; Patterns for vector struct loads and stores.
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_ld2<vstruct_elt>"
[(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
(unspec:VSTRUCT_2Q [
(match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD2))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
[(set_attr "type" "neon_load2_2reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_ld2r<vstruct_elt>"
[(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
(unspec:VSTRUCT_2QD [
(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD2_DUP))]
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf) PR target/63173 * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro. (__LD3R_FUNC): Ditto. (__LD4R_FUNC): Ditto. (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64, vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions. (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise. (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise. * config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP. * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New builtins. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern. (aarch64_simd_ld3r<mode>): Likewise. (aarch64_simd_ld4r<mode>): Likewise. (aarch64_ld2r<mode>): New expand. (aarch64_ld3r<mode>): Likewise. (aarch64_ld4r<mode>): Likewise. Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com> From-SVN: r216630
2014-10-24 12:53:08 +02:00
"TARGET_SIMD"
"ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
[(set_attr "type" "neon_load2_all_lanes<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
[(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
(unspec:VSTRUCT_2QD [
(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
(match_operand:VSTRUCT_2QD 2 "register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_LD2_LANE))]
"TARGET_SIMD"
re PR target/63870 ([Aarch64] [ARM] Errors in use of NEON intrinsics are reported incorrectly) gcc/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_struct_load_store_lane_index. (aarch64_types_loadstruct_lane_qualifiers): Use qualifier_struct_load_store_lane_index for lane index argument for last argument. (aarch64_types_storestruct_lane_qualifiers): Ditto. (builtin_simd_arg): Add SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_args): Add new argument describing mode of builtin. Check lane bounds for arguments with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Emit error for incorrect lane indices if marked with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Handle arguments with qualifier_struct_load_store_lane_index. Pass machine mode of builtin to aarch64_simd_expand_args. * config/aarch64/aarch64-simd-builtins.def: Declare ld[234]_lane and vst[234]_lane with BUILTIN_VALLDIF. * config/aarch64/aarch64-simd.md: (aarch64_vec_load_lanesoi_lane<mode>): Use VALLDIF iterator. Perform endianness reversal on lane index. (aarch64_vec_load_lanesci_lane<mode>): Ditto. (aarch64_vec_load_lanesxi_lane<mode>): Ditto. (vec_store_lanesoi_lane<mode>): Use VALLDIF iterator. (vec_store_lanesci_lane<mode>): Ditto. (vec_store_lanesxi_lane<mode>): Ditto. (aarch64_ld2_lane<mode>): Use VALLDIF iterator. Remove endianness reversal of lane index. (aarch64_ld3_lane<mode>): Ditto. (aarch64_ld4_lane<mode>): Ditto. (aarch64_st2_lane<mode>): Ditto. (aarch64_st3_lane<mode>): Ditto. (aarch64_st4_lane<mode>): Ditto. * config/aarch64/arm_neon.h (__LD2_LANE_FUNC): Rename mode parameter to qmode. Add new mode parameter. Update uses. (__LD3_LANE_FUNC): Ditto. (__LD4_LANE_FUNC): Ditto. (__ST2_LANE_FUNC): Ditto. (__ST3_LANE_FUNC): Ditto. (__ST4_LANE_FUNC): Ditto. gcc/testsuite/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c: New test. From-SVN: r226059
2015-07-22 12:44:16 +02:00
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
INTVAL (operands[3]));
re PR target/63870 ([Aarch64] [ARM] Errors in use of NEON intrinsics are reported incorrectly) gcc/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_struct_load_store_lane_index. (aarch64_types_loadstruct_lane_qualifiers): Use qualifier_struct_load_store_lane_index for lane index argument for last argument. (aarch64_types_storestruct_lane_qualifiers): Ditto. (builtin_simd_arg): Add SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_args): Add new argument describing mode of builtin. Check lane bounds for arguments with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Emit error for incorrect lane indices if marked with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Handle arguments with qualifier_struct_load_store_lane_index. Pass machine mode of builtin to aarch64_simd_expand_args. * config/aarch64/aarch64-simd-builtins.def: Declare ld[234]_lane and vst[234]_lane with BUILTIN_VALLDIF. * config/aarch64/aarch64-simd.md: (aarch64_vec_load_lanesoi_lane<mode>): Use VALLDIF iterator. Perform endianness reversal on lane index. (aarch64_vec_load_lanesci_lane<mode>): Ditto. (aarch64_vec_load_lanesxi_lane<mode>): Ditto. (vec_store_lanesoi_lane<mode>): Use VALLDIF iterator. (vec_store_lanesci_lane<mode>): Ditto. (vec_store_lanesxi_lane<mode>): Ditto. (aarch64_ld2_lane<mode>): Use VALLDIF iterator. Remove endianness reversal of lane index. (aarch64_ld3_lane<mode>): Ditto. (aarch64_ld4_lane<mode>): Ditto. (aarch64_st2_lane<mode>): Ditto. (aarch64_st3_lane<mode>): Ditto. (aarch64_st4_lane<mode>): Ditto. * config/aarch64/arm_neon.h (__LD2_LANE_FUNC): Rename mode parameter to qmode. Add new mode parameter. Update uses. (__LD3_LANE_FUNC): Ditto. (__LD4_LANE_FUNC): Ditto. (__ST2_LANE_FUNC): Ditto. (__ST3_LANE_FUNC): Ditto. (__ST4_LANE_FUNC): Ditto. gcc/testsuite/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c: New test. From-SVN: r226059
2015-07-22 12:44:16 +02:00
return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
}
[(set_attr "type" "neon_load2_one_lane")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "vec_load_lanes<mode><vstruct_elt>"
[(set (match_operand:VSTRUCT_2Q 0 "register_operand")
(unspec:VSTRUCT_2Q [
(match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
UNSPEC_LD2))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
}
else
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_st2<vstruct_elt>"
[(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_2Q [
(match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
UNSPEC_ST2))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
[(set_attr "type" "neon_store2_2reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; RTL uses GCC vector extension indices, so flip only for assembly.
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
UNSPEC_ST2_LANE))]
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
INTVAL (operands[2]));
return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
}
[(set_attr "type" "neon_store2_one_lane<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "vec_store_lanes<mode><vstruct_elt>"
[(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
(unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
UNSPEC_ST2))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
}
else
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_ld3<vstruct_elt>"
[(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
(unspec:VSTRUCT_3Q [
(match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD3))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
[(set_attr "type" "neon_load3_3reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_ld3r<vstruct_elt>"
[(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
(unspec:VSTRUCT_3QD [
(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD3_DUP))]
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf) PR target/63173 * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro. (__LD3R_FUNC): Ditto. (__LD4R_FUNC): Ditto. (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64, vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions. (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise. (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise. * config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP. * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New builtins. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern. (aarch64_simd_ld3r<mode>): Likewise. (aarch64_simd_ld4r<mode>): Likewise. (aarch64_ld2r<mode>): New expand. (aarch64_ld3r<mode>): Likewise. (aarch64_ld4r<mode>): Likewise. Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com> From-SVN: r216630
2014-10-24 12:53:08 +02:00
"TARGET_SIMD"
"ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
[(set_attr "type" "neon_load3_all_lanes<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
[(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
(unspec:VSTRUCT_3QD [
(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
(match_operand:VSTRUCT_3QD 2 "register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_LD3_LANE))]
"TARGET_SIMD"
re PR target/63870 ([Aarch64] [ARM] Errors in use of NEON intrinsics are reported incorrectly) gcc/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_struct_load_store_lane_index. (aarch64_types_loadstruct_lane_qualifiers): Use qualifier_struct_load_store_lane_index for lane index argument for last argument. (aarch64_types_storestruct_lane_qualifiers): Ditto. (builtin_simd_arg): Add SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_args): Add new argument describing mode of builtin. Check lane bounds for arguments with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Emit error for incorrect lane indices if marked with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Handle arguments with qualifier_struct_load_store_lane_index. Pass machine mode of builtin to aarch64_simd_expand_args. * config/aarch64/aarch64-simd-builtins.def: Declare ld[234]_lane and vst[234]_lane with BUILTIN_VALLDIF. * config/aarch64/aarch64-simd.md: (aarch64_vec_load_lanesoi_lane<mode>): Use VALLDIF iterator. Perform endianness reversal on lane index. (aarch64_vec_load_lanesci_lane<mode>): Ditto. (aarch64_vec_load_lanesxi_lane<mode>): Ditto. (vec_store_lanesoi_lane<mode>): Use VALLDIF iterator. (vec_store_lanesci_lane<mode>): Ditto. (vec_store_lanesxi_lane<mode>): Ditto. (aarch64_ld2_lane<mode>): Use VALLDIF iterator. Remove endianness reversal of lane index. (aarch64_ld3_lane<mode>): Ditto. (aarch64_ld4_lane<mode>): Ditto. (aarch64_st2_lane<mode>): Ditto. (aarch64_st3_lane<mode>): Ditto. (aarch64_st4_lane<mode>): Ditto. * config/aarch64/arm_neon.h (__LD2_LANE_FUNC): Rename mode parameter to qmode. Add new mode parameter. Update uses. (__LD3_LANE_FUNC): Ditto. (__LD4_LANE_FUNC): Ditto. (__ST2_LANE_FUNC): Ditto. (__ST3_LANE_FUNC): Ditto. (__ST4_LANE_FUNC): Ditto. gcc/testsuite/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c: New test. From-SVN: r226059
2015-07-22 12:44:16 +02:00
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
INTVAL (operands[3]));
re PR target/63870 ([Aarch64] [ARM] Errors in use of NEON intrinsics are reported incorrectly) gcc/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_struct_load_store_lane_index. (aarch64_types_loadstruct_lane_qualifiers): Use qualifier_struct_load_store_lane_index for lane index argument for last argument. (aarch64_types_storestruct_lane_qualifiers): Ditto. (builtin_simd_arg): Add SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_args): Add new argument describing mode of builtin. Check lane bounds for arguments with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Emit error for incorrect lane indices if marked with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Handle arguments with qualifier_struct_load_store_lane_index. Pass machine mode of builtin to aarch64_simd_expand_args. * config/aarch64/aarch64-simd-builtins.def: Declare ld[234]_lane and vst[234]_lane with BUILTIN_VALLDIF. * config/aarch64/aarch64-simd.md: (aarch64_vec_load_lanesoi_lane<mode>): Use VALLDIF iterator. Perform endianness reversal on lane index. (aarch64_vec_load_lanesci_lane<mode>): Ditto. (aarch64_vec_load_lanesxi_lane<mode>): Ditto. (vec_store_lanesoi_lane<mode>): Use VALLDIF iterator. (vec_store_lanesci_lane<mode>): Ditto. (vec_store_lanesxi_lane<mode>): Ditto. (aarch64_ld2_lane<mode>): Use VALLDIF iterator. Remove endianness reversal of lane index. (aarch64_ld3_lane<mode>): Ditto. (aarch64_ld4_lane<mode>): Ditto. (aarch64_st2_lane<mode>): Ditto. (aarch64_st3_lane<mode>): Ditto. (aarch64_st4_lane<mode>): Ditto. * config/aarch64/arm_neon.h (__LD2_LANE_FUNC): Rename mode parameter to qmode. Add new mode parameter. Update uses. (__LD3_LANE_FUNC): Ditto. (__LD4_LANE_FUNC): Ditto. (__ST2_LANE_FUNC): Ditto. (__ST3_LANE_FUNC): Ditto. (__ST4_LANE_FUNC): Ditto. gcc/testsuite/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c: New test. From-SVN: r226059
2015-07-22 12:44:16 +02:00
return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
}
[(set_attr "type" "neon_load3_one_lane")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "vec_load_lanes<mode><vstruct_elt>"
[(set (match_operand:VSTRUCT_3Q 0 "register_operand")
(unspec:VSTRUCT_3Q [
(match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
UNSPEC_LD3))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
}
else
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_st3<vstruct_elt>"
[(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
UNSPEC_ST3))]
"TARGET_SIMD"
"st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
[(set_attr "type" "neon_store3_3reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; RTL uses GCC vector extension indices, so flip only for assembly.
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
UNSPEC_ST3_LANE))]
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
INTVAL (operands[2]));
return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
}
[(set_attr "type" "neon_store3_one_lane<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "vec_store_lanes<mode><vstruct_elt>"
[(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
(unspec:VSTRUCT_3Q [
(match_operand:VSTRUCT_3Q 1 "register_operand")]
UNSPEC_ST3))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
}
else
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_ld4<vstruct_elt>"
[(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
(unspec:VSTRUCT_4Q [
(match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD4))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
[(set_attr "type" "neon_load4_4reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_ld4r<vstruct_elt>"
[(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
(unspec:VSTRUCT_4QD [
(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD4_DUP))]
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf) PR target/63173 * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro. (__LD3R_FUNC): Ditto. (__LD4R_FUNC): Ditto. (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64, vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions. (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise. (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise. * config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP. * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New builtins. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern. (aarch64_simd_ld3r<mode>): Likewise. (aarch64_simd_ld4r<mode>): Likewise. (aarch64_ld2r<mode>): New expand. (aarch64_ld3r<mode>): Likewise. (aarch64_ld4r<mode>): Likewise. Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com> From-SVN: r216630
2014-10-24 12:53:08 +02:00
"TARGET_SIMD"
"ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
[(set_attr "type" "neon_load4_all_lanes<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
[(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
(unspec:VSTRUCT_4QD [
(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
(match_operand:VSTRUCT_4QD 2 "register_operand" "0")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_LD4_LANE))]
"TARGET_SIMD"
re PR target/63870 ([Aarch64] [ARM] Errors in use of NEON intrinsics are reported incorrectly) gcc/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_struct_load_store_lane_index. (aarch64_types_loadstruct_lane_qualifiers): Use qualifier_struct_load_store_lane_index for lane index argument for last argument. (aarch64_types_storestruct_lane_qualifiers): Ditto. (builtin_simd_arg): Add SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_args): Add new argument describing mode of builtin. Check lane bounds for arguments with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Emit error for incorrect lane indices if marked with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Handle arguments with qualifier_struct_load_store_lane_index. Pass machine mode of builtin to aarch64_simd_expand_args. * config/aarch64/aarch64-simd-builtins.def: Declare ld[234]_lane and vst[234]_lane with BUILTIN_VALLDIF. * config/aarch64/aarch64-simd.md: (aarch64_vec_load_lanesoi_lane<mode>): Use VALLDIF iterator. Perform endianness reversal on lane index. (aarch64_vec_load_lanesci_lane<mode>): Ditto. (aarch64_vec_load_lanesxi_lane<mode>): Ditto. (vec_store_lanesoi_lane<mode>): Use VALLDIF iterator. (vec_store_lanesci_lane<mode>): Ditto. (vec_store_lanesxi_lane<mode>): Ditto. (aarch64_ld2_lane<mode>): Use VALLDIF iterator. Remove endianness reversal of lane index. (aarch64_ld3_lane<mode>): Ditto. (aarch64_ld4_lane<mode>): Ditto. (aarch64_st2_lane<mode>): Ditto. (aarch64_st3_lane<mode>): Ditto. (aarch64_st4_lane<mode>): Ditto. * config/aarch64/arm_neon.h (__LD2_LANE_FUNC): Rename mode parameter to qmode. Add new mode parameter. Update uses. (__LD3_LANE_FUNC): Ditto. (__LD4_LANE_FUNC): Ditto. (__ST2_LANE_FUNC): Ditto. (__ST3_LANE_FUNC): Ditto. (__ST4_LANE_FUNC): Ditto. gcc/testsuite/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c: New test. From-SVN: r226059
2015-07-22 12:44:16 +02:00
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
INTVAL (operands[3]));
re PR target/63870 ([Aarch64] [ARM] Errors in use of NEON intrinsics are reported incorrectly) gcc/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_struct_load_store_lane_index. (aarch64_types_loadstruct_lane_qualifiers): Use qualifier_struct_load_store_lane_index for lane index argument for last argument. (aarch64_types_storestruct_lane_qualifiers): Ditto. (builtin_simd_arg): Add SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_args): Add new argument describing mode of builtin. Check lane bounds for arguments with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Emit error for incorrect lane indices if marked with SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (aarch64_simd_expand_builtin): Handle arguments with qualifier_struct_load_store_lane_index. Pass machine mode of builtin to aarch64_simd_expand_args. * config/aarch64/aarch64-simd-builtins.def: Declare ld[234]_lane and vst[234]_lane with BUILTIN_VALLDIF. * config/aarch64/aarch64-simd.md: (aarch64_vec_load_lanesoi_lane<mode>): Use VALLDIF iterator. Perform endianness reversal on lane index. (aarch64_vec_load_lanesci_lane<mode>): Ditto. (aarch64_vec_load_lanesxi_lane<mode>): Ditto. (vec_store_lanesoi_lane<mode>): Use VALLDIF iterator. (vec_store_lanesci_lane<mode>): Ditto. (vec_store_lanesxi_lane<mode>): Ditto. (aarch64_ld2_lane<mode>): Use VALLDIF iterator. Remove endianness reversal of lane index. (aarch64_ld3_lane<mode>): Ditto. (aarch64_ld4_lane<mode>): Ditto. (aarch64_st2_lane<mode>): Ditto. (aarch64_st3_lane<mode>): Ditto. (aarch64_st4_lane<mode>): Ditto. * config/aarch64/arm_neon.h (__LD2_LANE_FUNC): Rename mode parameter to qmode. Add new mode parameter. Update uses. (__LD3_LANE_FUNC): Ditto. (__LD4_LANE_FUNC): Ditto. (__ST2_LANE_FUNC): Ditto. (__ST3_LANE_FUNC): Ditto. (__ST4_LANE_FUNC): Ditto. gcc/testsuite/ChangeLog: 2015-07-22 Charles Baylis <charles.baylis@linaro.org> * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4_lane_u8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_p8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_s8_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u16_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u32_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u64_indices_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_u8_indices_1.c: New test. From-SVN: r226059
2015-07-22 12:44:16 +02:00
return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
}
[(set_attr "type" "neon_load4_one_lane")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "vec_load_lanes<mode><vstruct_elt>"
[(set (match_operand:VSTRUCT_4Q 0 "register_operand")
(unspec:VSTRUCT_4Q [
(match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
UNSPEC_LD4))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
}
else
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_st4<vstruct_elt>"
[(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_4Q [
(match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
UNSPEC_ST4))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
[(set_attr "type" "neon_store4_4reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; RTL uses GCC vector extension indices, so flip only for assembly.
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
[(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "i")]
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
UNSPEC_ST4_LANE))]
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
INTVAL (operands[2]));
return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
}
[(set_attr "type" "neon_store4_one_lane<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "vec_store_lanes<mode><vstruct_elt>"
[(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
(unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
UNSPEC_ST4))]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
}
else
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
DONE;
})
(define_insn_and_split "aarch64_rev_reglist<mode>"
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
[(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
(unspec:VSTRUCT_QD
[(match_operand:VSTRUCT_QD 1 "register_operand" "w")
(match_operand:V16QI 2 "register_operand" "w")]
UNSPEC_REV_REGLIST))]
"TARGET_SIMD"
"#"
"&& reload_completed"
[(const_int 0)]
{
int i;
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
for (i = 0; i < nregs; i++)
{
rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
aarch64: Refactor TBL/TBX RTL patterns Rename two-source-register TBL/TBX RTL patterns so that their names better reflect what they do, rather than confusing them with tbl3 or tbx4 patterns. Also use the correct "neon_tbl2" type attribute for both patterns. Rename single-source-register TBL/TBX patterns for consistency. gcc/ChangeLog: 2021-07-08 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Use two variant generators for all TBL/TBX intrinsics and rename to consistent forms: qtbl[1234] or qtbx[1234]. * config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>): Rename to... (aarch64_qtbl1<mode>): This. (aarch64_tbx1<mode>): Rename to... (aarch64_qtbx1<mode>): This. (aarch64_tbl2v16qi): Delete. (aarch64_tbl3<mode>): Rename to... (aarch64_qtbl2<mode>): This. (aarch64_tbx4<mode>): Rename to... (aarch64_qtbx2<mode>): This. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use renamed qtbl1 and qtbl2 RTL patterns. * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 RTL pattern. (vqtbl1_s8): Likewise. (vqtbl1_u8): Likewise. (vqtbl1q_p8): Likewise. (vqtbl1q_s8): Likewise. (vqtbl1q_u8): Likewise. (vqtbx1_s8): Use renamed qtbx1 RTL pattern. (vqtbx1_u8): Likewise. (vqtbx1_p8): Likewise. (vqtbx1q_s8): Likewise. (vqtbx1q_u8): Likewise. (vqtbx1q_p8): Likewise. (vtbl1_s8): Use renamed qtbl1 RTL pattern. (vtbl1_u8): Likewise. (vtbl1_p8): Likewise. (vtbl2_s8): Likewise (vtbl2_u8): Likewise. (vtbl2_p8): Likewise. (vtbl3_s8): Use renamed qtbl2 RTL pattern. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vtbx2_s8): Use renamed qtbx2 RTL pattern. (vtbx2_u8): Likewise. (vtbx2_p8): Likewise. (vqtbl2_s8): Use renamed qtbl2 RTL pattern. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbx2_s8): Use renamed qtbx2 RTL pattern. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise.
2021-07-09 11:12:19 +02:00
emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
}
DONE;
}
[(set_attr "type" "neon_tbl1_q")
(set_attr "length" "<insn_count>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
;; Reload patterns for AdvSIMD register list operands.
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "mov<mode>"
[(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_QD 1 "general_operand"))]
"TARGET_SIMD"
{
if (can_create_pseudo_p ())
{
if (GET_CODE (operands[0]) != REG)
operands[1] = force_reg (<MODE>mode, operands[1]);
}
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_expand "mov<mode>"
[(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
(match_operand:VSTRUCT 1 "general_operand"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
if (can_create_pseudo_p ())
{
if (GET_CODE (operands[0]) != REG)
operands[1] = force_reg (<MODE>mode, operands[1]);
}
})
aarch64: Add LS64 extension and intrinsics This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte extension) which is part of Armv8.7-A architecture. Changes include missing plumbing for TARGET_LS64, LS64 data structure and intrinsics defined in ACLE. Machine description of intrinsics is using new V8DI mode added in a separate patch. __ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for atomic 64-byte access to device memory are supported. New compiler internal type is added wrapping ACLE struct data512_t: typedef struct { uint64_t val[8]; } __arm_data512_t; gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B, AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0. (aarch64_init_ls64_builtin_decl): Helper function. (aarch64_init_ls64_builtins): Helper function. (aarch64_init_ls64_builtins_types): Helper function. (aarch64_general_init_builtins): Init LS64 intrisics for TARGET_LS64. (aarch64_expand_builtin_ls64): LS64 intrinsics expander. (aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64. (ls64_builtins_data): New helper struct. (v8di_UP): New define. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define __ARM_FEATURE_LS64. * config/aarch64/aarch64.c (aarch64_classify_address): Enforce the V8DI range (7-bit signed scaled) for both ends of the range. * config/aarch64/aarch64-simd.md (movv8di): New pattern. (aarch64_movv8di): New pattern. * config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define. (TARGET_LS64): New define. * config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B, UNSPEC_ST64BV and UNSPEC_ST64BV0. (ld64b): New define_insn. (st64b): New define_insn. (st64bv): New define_insn. (st64bv0): New define_insn. * config/aarch64/arm_acle.h (data512_t): New type derived from __arm_data512_t. (__arm_data512_t): New internal type. (__arm_ld64b): New intrinsic. (__arm_st64b): New intrinsic. (__arm_st64bv): New intrinsic. (__arm_st64bv0): New intrinsic. * config/arm/types.md: Add new type ls64. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/ls64_asm.c: New test. * gcc.target/aarch64/acle/ls64_ld64b.c: New test. * gcc.target/aarch64/acle/ls64_ld64b-2.c: New test. * gcc.target/aarch64/acle/ls64_ld64b-3.c: New test. * gcc.target/aarch64/acle/ls64_st64b.c: New test. * gcc.target/aarch64/acle/ls64_ld_st_o0.c: New test. * gcc.target/aarch64/acle/ls64_st64b-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv.c: New test. * gcc.target/aarch64/acle/ls64_st64bv-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv-3.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0-3.c: New test. * gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks for __ARM_FEATURE_LS64.
2021-12-14 15:03:38 +01:00
(define_expand "movv8di"
[(set (match_operand:V8DI 0 "nonimmediate_operand")
(match_operand:V8DI 1 "general_operand"))]
"TARGET_SIMD"
{
if (can_create_pseudo_p () && MEM_P (operands[0]))
operands[1] = force_reg (V8DImode, operands[1]);
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_ld1x3<vstruct_elt>"
[(match_operand:VSTRUCT_3QD 0 "register_operand")
(match_operand:DI 1 "register_operand")]
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_ld1_x3_<vstruct_elt>"
[(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
(unspec:VSTRUCT_3QD
[(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD1))]
"TARGET_SIMD"
"ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
[(set_attr "type" "neon_load1_3reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_ld1x4<vstruct_elt>"
[(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
(match_operand:DI 1 "register_operand" "r")]
[aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4 This patch adds the intrinsic functions for: - vld1_<mode>_x4 - vst1_<mode>_x4 - vld1q_<mode>_x4 - vst1q_<mode>_x4 Bootstrapped and tested on aarch64-none-linux-gnu. Committed on behalf of Sylvia Taylor. 2019-08-22 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd-builtins.def: (ld1x4): New. (st1x4): Likewise. * config/aarch64/aarch64-simd.md: (aarch64_ld1x4<VALLDIF:mode>): New pattern. (aarch64_st1x4<VALLDIF:mode>): Likewise. (aarch64_ld1_x4_<mode>): Likewise. (aarch64_st1_x4_<mode>): Likewise. * config/aarch64/arm_neon.h: (vld1_s8_x4): New function. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. From-SVN: r274820
2019-08-22 13:28:26 +02:00
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
[aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4 This patch adds the intrinsic functions for: - vld1_<mode>_x4 - vst1_<mode>_x4 - vld1q_<mode>_x4 - vst1q_<mode>_x4 Bootstrapped and tested on aarch64-none-linux-gnu. Committed on behalf of Sylvia Taylor. 2019-08-22 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd-builtins.def: (ld1x4): New. (st1x4): Likewise. * config/aarch64/aarch64-simd.md: (aarch64_ld1x4<VALLDIF:mode>): New pattern. (aarch64_st1x4<VALLDIF:mode>): Likewise. (aarch64_ld1_x4_<mode>): Likewise. (aarch64_st1_x4_<mode>): Likewise. * config/aarch64/arm_neon.h: (vld1_s8_x4): New function. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. From-SVN: r274820
2019-08-22 13:28:26 +02:00
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_ld1_x4_<vstruct_elt>"
[(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
(unspec:VSTRUCT_4QD
[(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
[aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4 This patch adds the intrinsic functions for: - vld1_<mode>_x4 - vst1_<mode>_x4 - vld1q_<mode>_x4 - vst1q_<mode>_x4 Bootstrapped and tested on aarch64-none-linux-gnu. Committed on behalf of Sylvia Taylor. 2019-08-22 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd-builtins.def: (ld1x4): New. (st1x4): Likewise. * config/aarch64/aarch64-simd.md: (aarch64_ld1x4<VALLDIF:mode>): New pattern. (aarch64_st1x4<VALLDIF:mode>): Likewise. (aarch64_ld1_x4_<mode>): Likewise. (aarch64_st1_x4_<mode>): Likewise. * config/aarch64/arm_neon.h: (vld1_s8_x4): New function. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. From-SVN: r274820
2019-08-22 13:28:26 +02:00
UNSPEC_LD1))]
"TARGET_SIMD"
"ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
[(set_attr "type" "neon_load1_4reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_st1x2<vstruct_elt>"
[(match_operand:DI 0 "register_operand")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:VSTRUCT_2QD 1 "register_operand")]
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st1_x2_<vstruct_elt>"
[(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_2QD
[(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
UNSPEC_ST1))]
"TARGET_SIMD"
"st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
[(set_attr "type" "neon_store1_2reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_st1x3<vstruct_elt>"
[(match_operand:DI 0 "register_operand")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:VSTRUCT_3QD 1 "register_operand")]
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st1_x3_<vstruct_elt>"
[(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_3QD
[(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
UNSPEC_ST1))]
"TARGET_SIMD"
"st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
[(set_attr "type" "neon_store1_3reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_st1x4<vstruct_elt>"
[aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4 This patch adds the intrinsic functions for: - vld1_<mode>_x4 - vst1_<mode>_x4 - vld1q_<mode>_x4 - vst1q_<mode>_x4 Bootstrapped and tested on aarch64-none-linux-gnu. Committed on behalf of Sylvia Taylor. 2019-08-22 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd-builtins.def: (ld1x4): New. (st1x4): Likewise. * config/aarch64/aarch64-simd.md: (aarch64_ld1x4<VALLDIF:mode>): New pattern. (aarch64_st1x4<VALLDIF:mode>): Likewise. (aarch64_ld1_x4_<mode>): Likewise. (aarch64_st1_x4_<mode>): Likewise. * config/aarch64/arm_neon.h: (vld1_s8_x4): New function. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. From-SVN: r274820
2019-08-22 13:28:26 +02:00
[(match_operand:DI 0 "register_operand" "")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:VSTRUCT_4QD 1 "register_operand" "")]
[aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4 This patch adds the intrinsic functions for: - vld1_<mode>_x4 - vst1_<mode>_x4 - vld1q_<mode>_x4 - vst1q_<mode>_x4 Bootstrapped and tested on aarch64-none-linux-gnu. Committed on behalf of Sylvia Taylor. 2019-08-22 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd-builtins.def: (ld1x4): New. (st1x4): Likewise. * config/aarch64/aarch64-simd.md: (aarch64_ld1x4<VALLDIF:mode>): New pattern. (aarch64_st1x4<VALLDIF:mode>): Likewise. (aarch64_ld1_x4_<mode>): Likewise. (aarch64_st1_x4_<mode>): Likewise. * config/aarch64/arm_neon.h: (vld1_s8_x4): New function. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. From-SVN: r274820
2019-08-22 13:28:26 +02:00
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
[aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4 This patch adds the intrinsic functions for: - vld1_<mode>_x4 - vst1_<mode>_x4 - vld1q_<mode>_x4 - vst1q_<mode>_x4 Bootstrapped and tested on aarch64-none-linux-gnu. Committed on behalf of Sylvia Taylor. 2019-08-22 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd-builtins.def: (ld1x4): New. (st1x4): Likewise. * config/aarch64/aarch64-simd.md: (aarch64_ld1x4<VALLDIF:mode>): New pattern. (aarch64_st1x4<VALLDIF:mode>): Likewise. (aarch64_ld1_x4_<mode>): Likewise. (aarch64_st1_x4_<mode>): Likewise. * config/aarch64/arm_neon.h: (vld1_s8_x4): New function. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. From-SVN: r274820
2019-08-22 13:28:26 +02:00
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st1_x4_<vstruct_elt>"
[(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_4QD
[(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
UNSPEC_ST1))]
[aarch64]: add intrinsics for vld1(q)_x4 and vst1(q)_x4 This patch adds the intrinsic functions for: - vld1_<mode>_x4 - vst1_<mode>_x4 - vld1q_<mode>_x4 - vst1q_<mode>_x4 Bootstrapped and tested on aarch64-none-linux-gnu. Committed on behalf of Sylvia Taylor. 2019-08-22 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd-builtins.def: (ld1x4): New. (st1x4): Likewise. * config/aarch64/aarch64-simd.md: (aarch64_ld1x4<VALLDIF:mode>): New pattern. (aarch64_st1x4<VALLDIF:mode>): Likewise. (aarch64_ld1_x4_<mode>): Likewise. (aarch64_st1_x4_<mode>): Likewise. * config/aarch64/arm_neon.h: (vld1_s8_x4): New function. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vst1x4.c: New test. From-SVN: r274820
2019-08-22 13:28:26 +02:00
"TARGET_SIMD"
"st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
[(set_attr "type" "neon_store1_4reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
(match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand" " w,w,Utv"))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"@
#
st1\\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
ld1\\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1"
[(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
neon_load<nregs>_<nregs>reg_q")
(set_attr "length" "<insn_count>,4,4")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"@
#
st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
[(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
neon_load<nregs>_<nregs>reg_q")
(set_attr "length" "<insn_count>,4,4")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add LS64 extension and intrinsics This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte extension) which is part of Armv8.7-A architecture. Changes include missing plumbing for TARGET_LS64, LS64 data structure and intrinsics defined in ACLE. Machine description of intrinsics is using new V8DI mode added in a separate patch. __ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for atomic 64-byte access to device memory are supported. New compiler internal type is added wrapping ACLE struct data512_t: typedef struct { uint64_t val[8]; } __arm_data512_t; gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B, AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0. (aarch64_init_ls64_builtin_decl): Helper function. (aarch64_init_ls64_builtins): Helper function. (aarch64_init_ls64_builtins_types): Helper function. (aarch64_general_init_builtins): Init LS64 intrisics for TARGET_LS64. (aarch64_expand_builtin_ls64): LS64 intrinsics expander. (aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64. (ls64_builtins_data): New helper struct. (v8di_UP): New define. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define __ARM_FEATURE_LS64. * config/aarch64/aarch64.c (aarch64_classify_address): Enforce the V8DI range (7-bit signed scaled) for both ends of the range. * config/aarch64/aarch64-simd.md (movv8di): New pattern. (aarch64_movv8di): New pattern. * config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define. (TARGET_LS64): New define. * config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B, UNSPEC_ST64BV and UNSPEC_ST64BV0. (ld64b): New define_insn. (st64b): New define_insn. (st64bv): New define_insn. (st64bv0): New define_insn. * config/aarch64/arm_acle.h (data512_t): New type derived from __arm_data512_t. (__arm_data512_t): New internal type. (__arm_ld64b): New intrinsic. (__arm_st64b): New intrinsic. (__arm_st64bv): New intrinsic. (__arm_st64bv0): New intrinsic. * config/arm/types.md: Add new type ls64. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/ls64_asm.c: New test. * gcc.target/aarch64/acle/ls64_ld64b.c: New test. * gcc.target/aarch64/acle/ls64_ld64b-2.c: New test. * gcc.target/aarch64/acle/ls64_ld64b-3.c: New test. * gcc.target/aarch64/acle/ls64_st64b.c: New test. * gcc.target/aarch64/acle/ls64_ld_st_o0.c: New test. * gcc.target/aarch64/acle/ls64_st64b-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv.c: New test. * gcc.target/aarch64/acle/ls64_st64bv-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv-3.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0-3.c: New test. * gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks for __ARM_FEATURE_LS64.
2021-12-14 15:03:38 +01:00
(define_insn "*aarch64_movv8di"
[(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
(match_operand:V8DI 1 "general_operand" " r,r,m"))]
"(register_operand (operands[0], V8DImode)
|| register_operand (operands[1], V8DImode))"
aarch64: Add LS64 extension and intrinsics This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte extension) which is part of Armv8.7-A architecture. Changes include missing plumbing for TARGET_LS64, LS64 data structure and intrinsics defined in ACLE. Machine description of intrinsics is using new V8DI mode added in a separate patch. __ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for atomic 64-byte access to device memory are supported. New compiler internal type is added wrapping ACLE struct data512_t: typedef struct { uint64_t val[8]; } __arm_data512_t; gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B, AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0. (aarch64_init_ls64_builtin_decl): Helper function. (aarch64_init_ls64_builtins): Helper function. (aarch64_init_ls64_builtins_types): Helper function. (aarch64_general_init_builtins): Init LS64 intrisics for TARGET_LS64. (aarch64_expand_builtin_ls64): LS64 intrinsics expander. (aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64. (ls64_builtins_data): New helper struct. (v8di_UP): New define. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define __ARM_FEATURE_LS64. * config/aarch64/aarch64.c (aarch64_classify_address): Enforce the V8DI range (7-bit signed scaled) for both ends of the range. * config/aarch64/aarch64-simd.md (movv8di): New pattern. (aarch64_movv8di): New pattern. * config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define. (TARGET_LS64): New define. * config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B, UNSPEC_ST64BV and UNSPEC_ST64BV0. (ld64b): New define_insn. (st64b): New define_insn. (st64bv): New define_insn. (st64bv0): New define_insn. * config/aarch64/arm_acle.h (data512_t): New type derived from __arm_data512_t. (__arm_data512_t): New internal type. (__arm_ld64b): New intrinsic. (__arm_st64b): New intrinsic. (__arm_st64bv): New intrinsic. (__arm_st64bv0): New intrinsic. * config/arm/types.md: Add new type ls64. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/ls64_asm.c: New test. * gcc.target/aarch64/acle/ls64_ld64b.c: New test. * gcc.target/aarch64/acle/ls64_ld64b-2.c: New test. * gcc.target/aarch64/acle/ls64_ld64b-3.c: New test. * gcc.target/aarch64/acle/ls64_st64b.c: New test. * gcc.target/aarch64/acle/ls64_ld_st_o0.c: New test. * gcc.target/aarch64/acle/ls64_st64b-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv.c: New test. * gcc.target/aarch64/acle/ls64_st64bv-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv-3.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0-3.c: New test. * gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks for __ARM_FEATURE_LS64.
2021-12-14 15:03:38 +01:00
"#"
[(set_attr "type" "multiple,multiple,multiple")
(set_attr "length" "32,16,16")]
)
(define_insn "aarch64_be_ld1<mode>"
[(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
"aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD1))]
"TARGET_SIMD"
"ld1\\t{%0<Vmtype>}, %1"
[(set_attr "type" "neon_load1_1reg<q>")]
)
(define_insn "aarch64_be_st1<mode>"
[(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
UNSPEC_ST1))]
"TARGET_SIMD"
"st1\\t{%1<Vmtype>}, %0"
[(set_attr "type" "neon_store1_1reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
(match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"@
#
stp\\t%d1, %R1, %0
ldp\\t%d0, %R0, %1"
[(set_attr "type" "multiple,neon_stp,neon_ldp")
(set_attr "length" "8,4,4")]
)
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
(match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"@
#
stp\\t%q1, %R1, %0
ldp\\t%q0, %R0, %1"
[(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
(set_attr "length" "8,4,4")]
)
(define_insn "*aarch64_be_movoi"
[(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
(match_operand:OI 1 "general_operand" " w,w,m"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], OImode)
|| register_operand (operands[1], OImode))"
"@
#
stp\\t%q1, %R1, %0
ldp\\t%q0, %R0, %1"
[(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
(set_attr "length" "8,4,4")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
(match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"#"
[(set_attr "type" "multiple")
(set_attr "length" "12,8,8")]
)
(define_insn "*aarch64_be_movci"
[(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:CI 1 "general_operand" " w,w,o"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], CImode)
|| register_operand (operands[1], CImode))"
"#"
[(set_attr "type" "multiple")
(set_attr "length" "12,4,4")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
(match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"#"
[(set_attr "type" "multiple")
(set_attr "length" "16,8,8")]
)
(define_insn "*aarch64_be_movxi"
[(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:XI 1 "general_operand" " w,w,o"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], XImode)
|| register_operand (operands[1], XImode))"
"#"
[(set_attr "type" "multiple")
(set_attr "length" "16,4,4")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_split
[(set (match_operand:VSTRUCT_2QD 0 "register_operand")
(match_operand:VSTRUCT_2QD 1 "register_operand"))]
"TARGET_SIMD && reload_completed"
[(const_int 0)]
{
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
DONE;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_split
[(set (match_operand:OI 0 "register_operand")
(match_operand:OI 1 "register_operand"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD && reload_completed"
[(const_int 0)]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
{
aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
DONE;
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_split
[(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_3QD 1 "general_operand"))]
"TARGET_SIMD && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], <MODE>mode)
&& register_operand (operands[1], <MODE>mode))
{
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
DONE;
}
else if (BYTES_BIG_ENDIAN)
{
int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
<MODE>mode, 0),
simplify_gen_subreg (pair_mode, operands[1],
<MODE>mode, 0));
emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
simplify_gen_subreg (<VSTRUCT_ELT>mode,
operands[0],
<MODE>mode,
2 * elt_size)),
gen_lowpart (<VSTRUCT_ELT>mode,
simplify_gen_subreg (<VSTRUCT_ELT>mode,
operands[1],
<MODE>mode,
2 * elt_size)));
DONE;
}
else
FAIL;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_split
[(set (match_operand:CI 0 "nonimmediate_operand")
(match_operand:CI 1 "general_operand"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD && reload_completed"
[(const_int 0)]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
{
if (register_operand (operands[0], CImode)
&& register_operand (operands[1], CImode))
{
aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
DONE;
}
else if (BYTES_BIG_ENDIAN)
{
emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
simplify_gen_subreg (OImode, operands[1], CImode, 0));
emit_move_insn (gen_lowpart (V16QImode,
simplify_gen_subreg (TImode, operands[0],
CImode, 32)),
gen_lowpart (V16QImode,
simplify_gen_subreg (TImode, operands[1],
CImode, 32)));
DONE;
}
else
FAIL;
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_split
[(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_4QD 1 "general_operand"))]
"TARGET_SIMD && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], <MODE>mode)
&& register_operand (operands[1], <MODE>mode))
{
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
DONE;
}
else if (BYTES_BIG_ENDIAN)
{
int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
<MODE>mode, 0),
simplify_gen_subreg (pair_mode, operands[1],
<MODE>mode, 0));
emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
<MODE>mode, 2 * elt_size),
simplify_gen_subreg (pair_mode, operands[1],
<MODE>mode, 2 * elt_size));
DONE;
}
else
FAIL;
})
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
(define_split
[(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "general_operand"))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD && reload_completed"
[(const_int 0)]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
{
if (register_operand (operands[0], XImode)
&& register_operand (operands[1], XImode))
{
aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
DONE;
}
else if (BYTES_BIG_ENDIAN)
{
emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
simplify_gen_subreg (OImode, operands[1], XImode, 0));
emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
simplify_gen_subreg (OImode, operands[1], XImode, 32));
DONE;
}
else
FAIL;
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
})
aarch64: Add LS64 extension and intrinsics This patch is adding support for LS64 (Armv8.7-A Load/Store 64 Byte extension) which is part of Armv8.7-A architecture. Changes include missing plumbing for TARGET_LS64, LS64 data structure and intrinsics defined in ACLE. Machine description of intrinsics is using new V8DI mode added in a separate patch. __ARM_FEATURE_LS64 is defined if the Armv8.7-A LS64 instructions for atomic 64-byte access to device memory are supported. New compiler internal type is added wrapping ACLE struct data512_t: typedef struct { uint64_t val[8]; } __arm_data512_t; gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Define AARCH64_LS64_BUILTIN_LD64B, AARCH64_LS64_BUILTIN_ST64B, AARCH64_LS64_BUILTIN_ST64BV, AARCH64_LS64_BUILTIN_ST64BV0. (aarch64_init_ls64_builtin_decl): Helper function. (aarch64_init_ls64_builtins): Helper function. (aarch64_init_ls64_builtins_types): Helper function. (aarch64_general_init_builtins): Init LS64 intrisics for TARGET_LS64. (aarch64_expand_builtin_ls64): LS64 intrinsics expander. (aarch64_general_expand_builtin): Handle aarch64_expand_builtin_ls64. (ls64_builtins_data): New helper struct. (v8di_UP): New define. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define __ARM_FEATURE_LS64. * config/aarch64/aarch64.c (aarch64_classify_address): Enforce the V8DI range (7-bit signed scaled) for both ends of the range. * config/aarch64/aarch64-simd.md (movv8di): New pattern. (aarch64_movv8di): New pattern. * config/aarch64/aarch64.h (AARCH64_ISA_LS64): New define. (TARGET_LS64): New define. * config/aarch64/aarch64.md: Add UNSPEC_LD64B, UNSPEC_ST64B, UNSPEC_ST64BV and UNSPEC_ST64BV0. (ld64b): New define_insn. (st64b): New define_insn. (st64bv): New define_insn. (st64bv0): New define_insn. * config/aarch64/arm_acle.h (data512_t): New type derived from __arm_data512_t. (__arm_data512_t): New internal type. (__arm_ld64b): New intrinsic. (__arm_st64b): New intrinsic. (__arm_st64bv): New intrinsic. (__arm_st64bv0): New intrinsic. * config/arm/types.md: Add new type ls64. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/ls64_asm.c: New test. * gcc.target/aarch64/acle/ls64_ld64b.c: New test. * gcc.target/aarch64/acle/ls64_ld64b-2.c: New test. * gcc.target/aarch64/acle/ls64_ld64b-3.c: New test. * gcc.target/aarch64/acle/ls64_st64b.c: New test. * gcc.target/aarch64/acle/ls64_ld_st_o0.c: New test. * gcc.target/aarch64/acle/ls64_st64b-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv.c: New test. * gcc.target/aarch64/acle/ls64_st64bv-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv-3.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0-2.c: New test. * gcc.target/aarch64/acle/ls64_st64bv0-3.c: New test. * gcc.target/aarch64/pragma_cpp_predefs_2.c: Add checks for __ARM_FEATURE_LS64.
2021-12-14 15:03:38 +01:00
(define_split
[(set (match_operand:V8DI 0 "nonimmediate_operand")
(match_operand:V8DI 1 "general_operand"))]
"TARGET_SIMD && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], V8DImode)
&& register_operand (operands[1], V8DImode))
{
aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
DONE;
}
else if ((register_operand (operands[0], V8DImode)
&& memory_operand (operands[1], V8DImode))
|| (memory_operand (operands[0], V8DImode)
&& register_operand (operands[1], V8DImode)))
{
for (int offset = 0; offset < 64; offset += 16)
emit_move_insn (simplify_gen_subreg (TImode, operands[0],
V8DImode, offset),
simplify_gen_subreg (TImode, operands[1],
V8DImode, offset));
DONE;
}
else
FAIL;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_ld<nregs>r<vstruct_elt>"
[(match_operand:VSTRUCT_QD 0 "register_operand")
(match_operand:DI 1 "register_operand")]
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf) PR target/63173 * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro. (__LD3R_FUNC): Ditto. (__LD4R_FUNC): Ditto. (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64, vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions. (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise. (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise. * config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP. * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New builtins. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern. (aarch64_simd_ld3r<mode>): Likewise. (aarch64_simd_ld4r<mode>): Likewise. (aarch64_ld2r<mode>): New expand. (aarch64_ld3r<mode>): Likewise. (aarch64_ld4r<mode>): Likewise. Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com> From-SVN: r216630
2014-10-24 12:53:08 +02:00
"TARGET_SIMD"
{
rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf) PR target/63173 * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro. (__LD3R_FUNC): Ditto. (__LD4R_FUNC): Ditto. (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64, vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions. (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise. (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise. * config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP. * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New builtins. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern. (aarch64_simd_ld3r<mode>): Likewise. (aarch64_simd_ld4r<mode>): Likewise. (aarch64_ld2r<mode>): New expand. (aarch64_ld3r<mode>): Likewise. (aarch64_ld4r<mode>): Likewise. Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com> From-SVN: r216630
2014-10-24 12:53:08 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
re PR target/63173 (performance problem with simd intrinsics vld2_dup_* on aarch64-none-elf) PR target/63173 * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro. (__LD3R_FUNC): Ditto. (__LD4R_FUNC): Ditto. (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64, vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions. (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise. (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise. * config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP. * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New builtins. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern. (aarch64_simd_ld3r<mode>): Likewise. (aarch64_simd_ld4r<mode>): Likewise. (aarch64_ld2r<mode>): New expand. (aarch64_ld3r<mode>): Likewise. (aarch64_ld4r<mode>): Likewise. Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com> From-SVN: r216630
2014-10-24 12:53:08 +02:00
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_ld2<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
(unspec:VSTRUCT_2DNX [
(match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD2_DREG))]
[AArch64] Remove use of wider vector modes The AArch64 port defined x2, x3 and x4 vector modes that were only used in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that this rtl would have led to any valid simplifications, since the values involved were unspecs that had a different number of operands from the non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while the qreg one had two operands.) As it happened, the patterns led to invalid simplifications on big- endian targets due to a mix-up in the operand order, see Tamar's fix in r240271. This patch therefore replaces the rtl patterns with dedicated unspecs. This allows the x2, x3 and x4 modes to be removed, avoiding a clash with 256-bit and 512-bit SVE. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte vector modes. * config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete. * config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG) (UNSPEC_LD4_DREG): New unspecs. * config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le) (aarch64_ld2<mode>_dreg_be): Replace with... (aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld3<mode>_dreg_le) (aarch64_ld3<mode>_dreg_be): Replace with... (aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld4<mode>_dreg_le) (aarch64_ld4<mode>_dreg_be): Replace with... (aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG unspec. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251555
2017-08-31 11:51:40 +02:00
"TARGET_SIMD"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
[(set_attr "type" "neon_load2_2reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_ld2<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
(unspec:VSTRUCT_2DX [
(match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD2_DREG))]
[AArch64] Remove use of wider vector modes The AArch64 port defined x2, x3 and x4 vector modes that were only used in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that this rtl would have led to any valid simplifications, since the values involved were unspecs that had a different number of operands from the non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while the qreg one had two operands.) As it happened, the patterns led to invalid simplifications on big- endian targets due to a mix-up in the operand order, see Tamar's fix in r240271. This patch therefore replaces the rtl patterns with dedicated unspecs. This allows the x2, x3 and x4 modes to be removed, avoiding a clash with 256-bit and 512-bit SVE. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte vector modes. * config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete. * config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG) (UNSPEC_LD4_DREG): New unspecs. * config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le) (aarch64_ld2<mode>_dreg_be): Replace with... (aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld3<mode>_dreg_le) (aarch64_ld3<mode>_dreg_be): Replace with... (aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld4<mode>_dreg_le) (aarch64_ld4<mode>_dreg_be): Replace with... (aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG unspec. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251555
2017-08-31 11:51:40 +02:00
"TARGET_SIMD"
"ld1\\t{%S0.1d - %T0.1d}, %1"
[(set_attr "type" "neon_load1_2reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_ld3<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
(unspec:VSTRUCT_3DNX [
(match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD3_DREG))]
[AArch64] Remove use of wider vector modes The AArch64 port defined x2, x3 and x4 vector modes that were only used in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that this rtl would have led to any valid simplifications, since the values involved were unspecs that had a different number of operands from the non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while the qreg one had two operands.) As it happened, the patterns led to invalid simplifications on big- endian targets due to a mix-up in the operand order, see Tamar's fix in r240271. This patch therefore replaces the rtl patterns with dedicated unspecs. This allows the x2, x3 and x4 modes to be removed, avoiding a clash with 256-bit and 512-bit SVE. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte vector modes. * config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete. * config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG) (UNSPEC_LD4_DREG): New unspecs. * config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le) (aarch64_ld2<mode>_dreg_be): Replace with... (aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld3<mode>_dreg_le) (aarch64_ld3<mode>_dreg_be): Replace with... (aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld4<mode>_dreg_le) (aarch64_ld4<mode>_dreg_be): Replace with... (aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG unspec. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251555
2017-08-31 11:51:40 +02:00
"TARGET_SIMD"
"ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
[(set_attr "type" "neon_load3_3reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_ld3<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
(unspec:VSTRUCT_3DX [
(match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD3_DREG))]
[AArch64] Remove use of wider vector modes The AArch64 port defined x2, x3 and x4 vector modes that were only used in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that this rtl would have led to any valid simplifications, since the values involved were unspecs that had a different number of operands from the non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while the qreg one had two operands.) As it happened, the patterns led to invalid simplifications on big- endian targets due to a mix-up in the operand order, see Tamar's fix in r240271. This patch therefore replaces the rtl patterns with dedicated unspecs. This allows the x2, x3 and x4 modes to be removed, avoiding a clash with 256-bit and 512-bit SVE. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte vector modes. * config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete. * config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG) (UNSPEC_LD4_DREG): New unspecs. * config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le) (aarch64_ld2<mode>_dreg_be): Replace with... (aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld3<mode>_dreg_le) (aarch64_ld3<mode>_dreg_be): Replace with... (aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld4<mode>_dreg_le) (aarch64_ld4<mode>_dreg_be): Replace with... (aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG unspec. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251555
2017-08-31 11:51:40 +02:00
"TARGET_SIMD"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"ld1\\t{%S0.1d - %U0.1d}, %1"
[(set_attr "type" "neon_load1_3reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_ld4<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
(unspec:VSTRUCT_4DNX [
(match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD4_DREG))]
[AArch64] Remove use of wider vector modes The AArch64 port defined x2, x3 and x4 vector modes that were only used in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that this rtl would have led to any valid simplifications, since the values involved were unspecs that had a different number of operands from the non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while the qreg one had two operands.) As it happened, the patterns led to invalid simplifications on big- endian targets due to a mix-up in the operand order, see Tamar's fix in r240271. This patch therefore replaces the rtl patterns with dedicated unspecs. This allows the x2, x3 and x4 modes to be removed, avoiding a clash with 256-bit and 512-bit SVE. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte vector modes. * config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete. * config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG) (UNSPEC_LD4_DREG): New unspecs. * config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le) (aarch64_ld2<mode>_dreg_be): Replace with... (aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld3<mode>_dreg_le) (aarch64_ld3<mode>_dreg_be): Replace with... (aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld4<mode>_dreg_le) (aarch64_ld4<mode>_dreg_be): Replace with... (aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG unspec. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251555
2017-08-31 11:51:40 +02:00
"TARGET_SIMD"
"ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
[(set_attr "type" "neon_load4_4reg<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_ld4<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
(unspec:VSTRUCT_4DX [
(match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD4_DREG))]
[AArch64] Remove use of wider vector modes The AArch64 port defined x2, x3 and x4 vector modes that were only used in the rtl for the AdvSIMD LD{2,3,4} patterns. It seems unlikely that this rtl would have led to any valid simplifications, since the values involved were unspecs that had a different number of operands from the non-dreg versions. (The dreg UNSPEC_LD2 had a single operand, while the qreg one had two operands.) As it happened, the patterns led to invalid simplifications on big- endian targets due to a mix-up in the operand order, see Tamar's fix in r240271. This patch therefore replaces the rtl patterns with dedicated unspecs. This allows the x2, x3 and x4 modes to be removed, avoiding a clash with 256-bit and 512-bit SVE. 2017-08-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def: Remove 32-, 48- and 64-byte vector modes. * config/aarch64/iterators.md (VRL2, VRL3, VRL4): Delete. * config/aarch64/aarch64.md (UNSPEC_LD2_DREG, UNSPEC_LD3_DREG) (UNSPEC_LD4_DREG): New unspecs. * config/aarch64/aarch64-simd.md (aarch64_ld2<mode>_dreg_le) (aarch64_ld2<mode>_dreg_be): Replace with... (aarch64_ld2<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld3<mode>_dreg_le) (aarch64_ld3<mode>_dreg_be): Replace with... (aarch64_ld3<mode>_dreg): ...this pattern and use the new DREG unspec. (aarch64_ld4<mode>_dreg_le) (aarch64_ld4<mode>_dreg_be): Replace with... (aarch64_ld4<mode>_dreg): ...this pattern and use the new DREG unspec. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r251555
2017-08-31 11:51:40 +02:00
"TARGET_SIMD"
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"ld1\\t{%S0.1d - %V0.1d}, %1"
[(set_attr "type" "neon_load1_4reg<q>")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_ld<nregs><vstruct_elt>"
[(match_operand:VSTRUCT_D 0 "register_operand")
(match_operand:DI 1 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
(define_expand "aarch64_ld1<VALL_F16:mode>"
[(match_operand:VALL_F16 0 "register_operand")
(match_operand:DI 1 "register_operand")]
"TARGET_SIMD"
{
machine_mode mode = <VALL_F16:MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[1]);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
else
emit_move_insn (operands[0], mem);
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_ld<nregs><vstruct_elt>"
[(match_operand:VSTRUCT_Q 0 "register_operand")
(match_operand:DI 1 "register_operand")]
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_ld1x2<vstruct_elt>"
[(match_operand:VSTRUCT_2QD 0 "register_operand")
(match_operand:DI 1 "register_operand")]
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
machine_mode mode = <MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[1]);
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
[(match_operand:VSTRUCT_QD 0 "register_operand")
(match_operand:DI 1 "register_operand")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:VSTRUCT_QD 2 "register_operand")
(match_operand:SI 3 "immediate_operand")]
"TARGET_SIMD"
{
rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
aarch64_simd_lane_bounds (operands[3], 0,
GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
mem, operands[2], operands[3]));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
;; Permuted-store expanders for neon intrinsics.
;; Permute instructions
;; vec_perm support
(define_expand "vec_perm<mode>"
[(match_operand:VB 0 "register_operand")
(match_operand:VB 1 "register_operand")
(match_operand:VB 2 "register_operand")
(match_operand:VB 3 "register_operand")]
"TARGET_SIMD"
{
aarch64_expand_vec_perm (operands[0], operands[1],
operands[2], operands[3], <nunits>);
DONE;
})
aarch64: Refactor TBL/TBX RTL patterns Rename two-source-register TBL/TBX RTL patterns so that their names better reflect what they do, rather than confusing them with tbl3 or tbx4 patterns. Also use the correct "neon_tbl2" type attribute for both patterns. Rename single-source-register TBL/TBX patterns for consistency. gcc/ChangeLog: 2021-07-08 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Use two variant generators for all TBL/TBX intrinsics and rename to consistent forms: qtbl[1234] or qtbx[1234]. * config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>): Rename to... (aarch64_qtbl1<mode>): This. (aarch64_tbx1<mode>): Rename to... (aarch64_qtbx1<mode>): This. (aarch64_tbl2v16qi): Delete. (aarch64_tbl3<mode>): Rename to... (aarch64_qtbl2<mode>): This. (aarch64_tbx4<mode>): Rename to... (aarch64_qtbx2<mode>): This. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use renamed qtbl1 and qtbl2 RTL patterns. * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 RTL pattern. (vqtbl1_s8): Likewise. (vqtbl1_u8): Likewise. (vqtbl1q_p8): Likewise. (vqtbl1q_s8): Likewise. (vqtbl1q_u8): Likewise. (vqtbx1_s8): Use renamed qtbx1 RTL pattern. (vqtbx1_u8): Likewise. (vqtbx1_p8): Likewise. (vqtbx1q_s8): Likewise. (vqtbx1q_u8): Likewise. (vqtbx1q_p8): Likewise. (vtbl1_s8): Use renamed qtbl1 RTL pattern. (vtbl1_u8): Likewise. (vtbl1_p8): Likewise. (vtbl2_s8): Likewise (vtbl2_u8): Likewise. (vtbl2_p8): Likewise. (vtbl3_s8): Use renamed qtbl2 RTL pattern. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vtbx2_s8): Use renamed qtbx2 RTL pattern. (vtbx2_u8): Likewise. (vtbx2_p8): Likewise. (vqtbl2_s8): Use renamed qtbl2 RTL pattern. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbx2_s8): Use renamed qtbx2 RTL pattern. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise.
2021-07-09 11:12:19 +02:00
(define_insn "aarch64_qtbl1<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
(match_operand:VB 2 "register_operand" "w")]
UNSPEC_TBL))]
"TARGET_SIMD"
"tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
[(set_attr "type" "neon_tbl1<q>")]
)
aarch64: Refactor TBL/TBX RTL patterns Rename two-source-register TBL/TBX RTL patterns so that their names better reflect what they do, rather than confusing them with tbl3 or tbx4 patterns. Also use the correct "neon_tbl2" type attribute for both patterns. Rename single-source-register TBL/TBX patterns for consistency. gcc/ChangeLog: 2021-07-08 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Use two variant generators for all TBL/TBX intrinsics and rename to consistent forms: qtbl[1234] or qtbx[1234]. * config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>): Rename to... (aarch64_qtbl1<mode>): This. (aarch64_tbx1<mode>): Rename to... (aarch64_qtbx1<mode>): This. (aarch64_tbl2v16qi): Delete. (aarch64_tbl3<mode>): Rename to... (aarch64_qtbl2<mode>): This. (aarch64_tbx4<mode>): Rename to... (aarch64_qtbx2<mode>): This. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use renamed qtbl1 and qtbl2 RTL patterns. * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 RTL pattern. (vqtbl1_s8): Likewise. (vqtbl1_u8): Likewise. (vqtbl1q_p8): Likewise. (vqtbl1q_s8): Likewise. (vqtbl1q_u8): Likewise. (vqtbx1_s8): Use renamed qtbx1 RTL pattern. (vqtbx1_u8): Likewise. (vqtbx1_p8): Likewise. (vqtbx1q_s8): Likewise. (vqtbx1q_u8): Likewise. (vqtbx1q_p8): Likewise. (vtbl1_s8): Use renamed qtbl1 RTL pattern. (vtbl1_u8): Likewise. (vtbl1_p8): Likewise. (vtbl2_s8): Likewise (vtbl2_u8): Likewise. (vtbl2_p8): Likewise. (vtbl3_s8): Use renamed qtbl2 RTL pattern. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vtbx2_s8): Use renamed qtbx2 RTL pattern. (vtbx2_u8): Likewise. (vtbx2_p8): Likewise. (vqtbl2_s8): Use renamed qtbl2 RTL pattern. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbx2_s8): Use renamed qtbx2 RTL pattern. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise.
2021-07-09 11:12:19 +02:00
(define_insn "aarch64_qtbx1<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
(unspec:VB [(match_operand:VB 1 "register_operand" "0")
(match_operand:V16QI 2 "register_operand" "w")
(match_operand:VB 3 "register_operand" "w")]
UNSPEC_TBX))]
"TARGET_SIMD"
"tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
[(set_attr "type" "neon_tbl1<q>")]
)
;; Two source registers.
aarch64: Refactor TBL/TBX RTL patterns Rename two-source-register TBL/TBX RTL patterns so that their names better reflect what they do, rather than confusing them with tbl3 or tbx4 patterns. Also use the correct "neon_tbl2" type attribute for both patterns. Rename single-source-register TBL/TBX patterns for consistency. gcc/ChangeLog: 2021-07-08 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Use two variant generators for all TBL/TBX intrinsics and rename to consistent forms: qtbl[1234] or qtbx[1234]. * config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>): Rename to... (aarch64_qtbl1<mode>): This. (aarch64_tbx1<mode>): Rename to... (aarch64_qtbx1<mode>): This. (aarch64_tbl2v16qi): Delete. (aarch64_tbl3<mode>): Rename to... (aarch64_qtbl2<mode>): This. (aarch64_tbx4<mode>): Rename to... (aarch64_qtbx2<mode>): This. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use renamed qtbl1 and qtbl2 RTL patterns. * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 RTL pattern. (vqtbl1_s8): Likewise. (vqtbl1_u8): Likewise. (vqtbl1q_p8): Likewise. (vqtbl1q_s8): Likewise. (vqtbl1q_u8): Likewise. (vqtbx1_s8): Use renamed qtbx1 RTL pattern. (vqtbx1_u8): Likewise. (vqtbx1_p8): Likewise. (vqtbx1q_s8): Likewise. (vqtbx1q_u8): Likewise. (vqtbx1q_p8): Likewise. (vtbl1_s8): Use renamed qtbl1 RTL pattern. (vtbl1_u8): Likewise. (vtbl1_p8): Likewise. (vtbl2_s8): Likewise (vtbl2_u8): Likewise. (vtbl2_p8): Likewise. (vtbl3_s8): Use renamed qtbl2 RTL pattern. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vtbx2_s8): Use renamed qtbx2 RTL pattern. (vtbx2_u8): Likewise. (vtbx2_p8): Likewise. (vqtbl2_s8): Use renamed qtbl2 RTL pattern. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbx2_s8): Use renamed qtbx2 RTL pattern. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise.
2021-07-09 11:12:19 +02:00
(define_insn "aarch64_qtbl2<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
(match_operand:VB 2 "register_operand" "w")]
UNSPEC_TBL))]
"TARGET_SIMD"
"tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
aarch64: Refactor TBL/TBX RTL patterns Rename two-source-register TBL/TBX RTL patterns so that their names better reflect what they do, rather than confusing them with tbl3 or tbx4 patterns. Also use the correct "neon_tbl2" type attribute for both patterns. Rename single-source-register TBL/TBX patterns for consistency. gcc/ChangeLog: 2021-07-08 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Use two variant generators for all TBL/TBX intrinsics and rename to consistent forms: qtbl[1234] or qtbx[1234]. * config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>): Rename to... (aarch64_qtbl1<mode>): This. (aarch64_tbx1<mode>): Rename to... (aarch64_qtbx1<mode>): This. (aarch64_tbl2v16qi): Delete. (aarch64_tbl3<mode>): Rename to... (aarch64_qtbl2<mode>): This. (aarch64_tbx4<mode>): Rename to... (aarch64_qtbx2<mode>): This. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use renamed qtbl1 and qtbl2 RTL patterns. * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 RTL pattern. (vqtbl1_s8): Likewise. (vqtbl1_u8): Likewise. (vqtbl1q_p8): Likewise. (vqtbl1q_s8): Likewise. (vqtbl1q_u8): Likewise. (vqtbx1_s8): Use renamed qtbx1 RTL pattern. (vqtbx1_u8): Likewise. (vqtbx1_p8): Likewise. (vqtbx1q_s8): Likewise. (vqtbx1q_u8): Likewise. (vqtbx1q_p8): Likewise. (vtbl1_s8): Use renamed qtbl1 RTL pattern. (vtbl1_u8): Likewise. (vtbl1_p8): Likewise. (vtbl2_s8): Likewise (vtbl2_u8): Likewise. (vtbl2_p8): Likewise. (vtbl3_s8): Use renamed qtbl2 RTL pattern. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vtbx2_s8): Use renamed qtbx2 RTL pattern. (vtbx2_u8): Likewise. (vtbx2_p8): Likewise. (vqtbl2_s8): Use renamed qtbl2 RTL pattern. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbx2_s8): Use renamed qtbx2 RTL pattern. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise.
2021-07-09 11:12:19 +02:00
[(set_attr "type" "neon_tbl2")]
)
aarch64: Refactor TBL/TBX RTL patterns Rename two-source-register TBL/TBX RTL patterns so that their names better reflect what they do, rather than confusing them with tbl3 or tbx4 patterns. Also use the correct "neon_tbl2" type attribute for both patterns. Rename single-source-register TBL/TBX patterns for consistency. gcc/ChangeLog: 2021-07-08 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Use two variant generators for all TBL/TBX intrinsics and rename to consistent forms: qtbl[1234] or qtbx[1234]. * config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>): Rename to... (aarch64_qtbl1<mode>): This. (aarch64_tbx1<mode>): Rename to... (aarch64_qtbx1<mode>): This. (aarch64_tbl2v16qi): Delete. (aarch64_tbl3<mode>): Rename to... (aarch64_qtbl2<mode>): This. (aarch64_tbx4<mode>): Rename to... (aarch64_qtbx2<mode>): This. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use renamed qtbl1 and qtbl2 RTL patterns. * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 RTL pattern. (vqtbl1_s8): Likewise. (vqtbl1_u8): Likewise. (vqtbl1q_p8): Likewise. (vqtbl1q_s8): Likewise. (vqtbl1q_u8): Likewise. (vqtbx1_s8): Use renamed qtbx1 RTL pattern. (vqtbx1_u8): Likewise. (vqtbx1_p8): Likewise. (vqtbx1q_s8): Likewise. (vqtbx1q_u8): Likewise. (vqtbx1q_p8): Likewise. (vtbl1_s8): Use renamed qtbl1 RTL pattern. (vtbl1_u8): Likewise. (vtbl1_p8): Likewise. (vtbl2_s8): Likewise (vtbl2_u8): Likewise. (vtbl2_p8): Likewise. (vtbl3_s8): Use renamed qtbl2 RTL pattern. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vtbx2_s8): Use renamed qtbx2 RTL pattern. (vtbx2_u8): Likewise. (vtbx2_p8): Likewise. (vqtbl2_s8): Use renamed qtbl2 RTL pattern. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbx2_s8): Use renamed qtbx2 RTL pattern. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise.
2021-07-09 11:12:19 +02:00
(define_insn "aarch64_qtbx2<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
(unspec:VB [(match_operand:VB 1 "register_operand" "0")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:V2x16QI 2 "register_operand" "w")
(match_operand:VB 3 "register_operand" "w")]
UNSPEC_TBX))]
"TARGET_SIMD"
"tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
aarch64: Refactor TBL/TBX RTL patterns Rename two-source-register TBL/TBX RTL patterns so that their names better reflect what they do, rather than confusing them with tbl3 or tbx4 patterns. Also use the correct "neon_tbl2" type attribute for both patterns. Rename single-source-register TBL/TBX patterns for consistency. gcc/ChangeLog: 2021-07-08 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Use two variant generators for all TBL/TBX intrinsics and rename to consistent forms: qtbl[1234] or qtbx[1234]. * config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>): Rename to... (aarch64_qtbl1<mode>): This. (aarch64_tbx1<mode>): Rename to... (aarch64_qtbx1<mode>): This. (aarch64_tbl2v16qi): Delete. (aarch64_tbl3<mode>): Rename to... (aarch64_qtbl2<mode>): This. (aarch64_tbx4<mode>): Rename to... (aarch64_qtbx2<mode>): This. * config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use renamed qtbl1 and qtbl2 RTL patterns. * config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1 RTL pattern. (vqtbl1_s8): Likewise. (vqtbl1_u8): Likewise. (vqtbl1q_p8): Likewise. (vqtbl1q_s8): Likewise. (vqtbl1q_u8): Likewise. (vqtbx1_s8): Use renamed qtbx1 RTL pattern. (vqtbx1_u8): Likewise. (vqtbx1_p8): Likewise. (vqtbx1q_s8): Likewise. (vqtbx1q_u8): Likewise. (vqtbx1q_p8): Likewise. (vtbl1_s8): Use renamed qtbl1 RTL pattern. (vtbl1_u8): Likewise. (vtbl1_p8): Likewise. (vtbl2_s8): Likewise (vtbl2_u8): Likewise. (vtbl2_p8): Likewise. (vtbl3_s8): Use renamed qtbl2 RTL pattern. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vtbx2_s8): Use renamed qtbx2 RTL pattern. (vtbx2_u8): Likewise. (vtbx2_p8): Likewise. (vqtbl2_s8): Use renamed qtbl2 RTL pattern. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbx2_s8): Use renamed qtbx2 RTL pattern. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise.
2021-07-09 11:12:19 +02:00
[(set_attr "type" "neon_tbl2")]
)
;; Three source registers.
(define_insn "aarch64_qtbl3<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
(match_operand:VB 2 "register_operand" "w")]
UNSPEC_TBL))]
"TARGET_SIMD"
"tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
[(set_attr "type" "neon_tbl3")]
)
(define_insn "aarch64_qtbx3<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
(unspec:VB [(match_operand:VB 1 "register_operand" "0")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:V3x16QI 2 "register_operand" "w")
(match_operand:VB 3 "register_operand" "w")]
UNSPEC_TBX))]
"TARGET_SIMD"
"tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
[(set_attr "type" "neon_tbl3")]
)
;; Four source registers.
(define_insn "aarch64_qtbl4<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
(match_operand:VB 2 "register_operand" "w")]
UNSPEC_TBL))]
"TARGET_SIMD"
"tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
[(set_attr "type" "neon_tbl4")]
)
(define_insn "aarch64_qtbx4<mode>"
[(set (match_operand:VB 0 "register_operand" "=w")
(unspec:VB [(match_operand:VB 1 "register_operand" "0")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:V4x16QI 2 "register_operand" "w")
(match_operand:VB 3 "register_operand" "w")]
UNSPEC_TBX))]
"TARGET_SIMD"
"tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
[(set_attr "type" "neon_tbl4")]
)
(define_insn_and_split "aarch64_combinev16qi"
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
[(set (match_operand:V2x16QI 0 "register_operand" "=w")
(unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
(match_operand:V16QI 2 "register_operand" "w")]
UNSPEC_CONCAT))]
"TARGET_SIMD"
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_split_combinev16qi (operands);
DONE;
}
[(set_attr "type" "multiple")]
)
;; This instruction's pattern is generated directly by
;; aarch64_expand_vec_perm_const, so any changes to the pattern would
;; need corresponding changes there.
(define_insn "aarch64_<PERMUTE:perm_insn><mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
(match_operand:VALL_F16 2 "register_operand" "w")]
PERMUTE))]
"TARGET_SIMD"
"<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_permute<q>")]
)
;; This instruction's pattern is generated directly by
;; aarch64_expand_vec_perm_const, so any changes to the pattern would
;; need corresponding changes there. Note that the immediate (third)
;; operand is a lane index not a byte index.
(define_insn "aarch64_ext<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
(match_operand:VALL_F16 2 "register_operand" "w")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_EXT))]
"TARGET_SIMD"
{
operands[3] = GEN_INT (INTVAL (operands[3])
* GET_MODE_UNIT_SIZE (<MODE>mode));
return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
}
[(set_attr "type" "neon_ext<q>")]
)
;; This instruction's pattern is generated directly by
;; aarch64_expand_vec_perm_const, so any changes to the pattern would
;; need corresponding changes there.
(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
REVERSE))]
"TARGET_SIMD"
"rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_rev<q>")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st2<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_2DNX [
(match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
UNSPEC_ST2))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
[(set_attr "type" "neon_store2_2reg")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st2<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_2DX [
(match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
UNSPEC_ST2))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"st1\\t{%S1.1d - %T1.1d}, %0"
[(set_attr "type" "neon_store1_2reg")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st3<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_3DNX [
(match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
UNSPEC_ST3))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
[(set_attr "type" "neon_store3_3reg")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st3<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_3DX [
(match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
UNSPEC_ST3))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"st1\\t{%S1.1d - %U1.1d}, %0"
[(set_attr "type" "neon_store1_3reg")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st4<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_4DNX [
(match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
UNSPEC_ST4))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
[(set_attr "type" "neon_store4_4reg")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_st4<vstruct_elt>_dreg"
[(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:VSTRUCT_4DX [
(match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
UNSPEC_ST4))]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
"st1\\t{%S1.1d - %V1.1d}, %0"
[(set_attr "type" "neon_store1_4reg")]
)
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_st<nregs><vstruct_elt>"
[(match_operand:DI 0 "register_operand")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:VSTRUCT_D 1 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_st<nregs><vstruct_elt>"
[(match_operand:DI 0 "register_operand")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:VSTRUCT_Q 1 "register_operand")]
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
"TARGET_SIMD"
{
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
AArch64 [3/10] 2012-10-23 Ian Bolton <ian.bolton@arm.com> James Greenhalgh <james.greenhalgh@arm.com> Jim MacArthur <jim.macarthur@arm.com> Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Marcus Shawcroft <marcus.shawcroft@arm.com> Nigel Stephens <nigel.stephens@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Richard Earnshaw <rearnsha@arm.com> Sofiane Naci <sofiane.naci@arm.com> Stephen Thomas <stephen.thomas@arm.com> Tejas Belagod <tejas.belagod@arm.com> Yufeng Zhang <yufeng.zhang@arm.com> * common/config/aarch64/aarch64-common.c: New file. * config/aarch64/aarch64-arches.def: New file. * config/aarch64/aarch64-builtins.c: New file. * config/aarch64/aarch64-cores.def: New file. * config/aarch64/aarch64-elf-raw.h: New file. * config/aarch64/aarch64-elf.h: New file. * config/aarch64/aarch64-generic.md: New file. * config/aarch64/aarch64-linux.h: New file. * config/aarch64/aarch64-modes.def: New file. * config/aarch64/aarch64-option-extensions.def: New file. * config/aarch64/aarch64-opts.h: New file. * config/aarch64/aarch64-protos.h: New file. * config/aarch64/aarch64-simd.md: New file. * config/aarch64/aarch64-tune.md: New file. * config/aarch64/aarch64.c: New file. * config/aarch64/aarch64.h: New file. * config/aarch64/aarch64.md: New file. * config/aarch64/aarch64.opt: New file. * config/aarch64/arm_neon.h: New file. * config/aarch64/constraints.md: New file. * config/aarch64/gentune.sh: New file. * config/aarch64/iterators.md: New file. * config/aarch64/large.md: New file. * config/aarch64/predicates.md: New file. * config/aarch64/small.md: New file. * config/aarch64/sync.md: New file. * config/aarch64/t-aarch64-linux: New file. * config/aarch64/t-aarch64: New file. Co-Authored-By: Chris Schlumberger-Socha <chris.schlumberger-socha@arm.com> Co-Authored-By: James Greenhalgh <james.greenhalgh@arm.com> Co-Authored-By: Jim MacArthur <jim.macarthur@arm.com> Co-Authored-By: Marcus Shawcroft <marcus.shawcroft@arm.com> Co-Authored-By: Nigel Stephens <nigel.stephens@arm.com> Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> Co-Authored-By: Sofiane Naci <sofiane.naci@arm.com> Co-Authored-By: Stephen Thomas <stephen.thomas@arm.com> Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com> Co-Authored-By: Yufeng Zhang <yufeng.zhang@arm.com> From-SVN: r192723
2012-10-23 19:02:30 +02:00
DONE;
})
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
[(match_operand:DI 0 "register_operand")
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(match_operand:VSTRUCT_QD 1 "register_operand")
(match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
{
rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
aarch64_simd_lane_bounds (operands[2], 0,
GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
operands[1], operands[2]));
DONE;
})
(define_expand "aarch64_st1<VALL_F16:mode>"
[(match_operand:DI 0 "register_operand")
(match_operand:VALL_F16 1 "register_operand")]
"TARGET_SIMD"
{
machine_mode mode = <VALL_F16:MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[0]);
if (BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
else
emit_move_insn (mem, operands[1]);
DONE;
})
re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel) PR target/80846 * optabs.def (vec_extract_optab, vec_init_optab): Change from a direct optab to conversion optab. * optabs.c (expand_vector_broadcast): Use convert_optab_handler with GET_MODE_INNER as last argument instead of optab_handler. * expmed.c (extract_bit_field_1): Likewise. Use vector from vector extraction if possible and optab is available. * expr.c (store_constructor): Use convert_optab_handler instead of optab_handler. Use vector initialization from smaller vectors if possible and optab is available. * tree-vect-stmts.c (vectorizable_load): Likewise. * doc/md.texi (vec_extract, vec_init): Document that the optabs now have two modes. * config/i386/i386.c (ix86_expand_vector_init): Handle expansion of vec_init from half-sized vectors with the same element mode. * config/i386/sse.md (ssehalfvecmode): Add V4TI case. (ssehalfvecmodelower, ssescalarmodelower): New mode attributes. (reduc_plus_scal_v8df, reduc_plus_scal_v4df, reduc_plus_scal_v2df, reduc_plus_scal_v16sf, reduc_plus_scal_v8sf, reduc_plus_scal_v4sf, reduc_<code>_scal_<mode>, reduc_umin_scal_v8hi): Add element mode after mode in gen_vec_extract* calls. (vec_extract<mode>): Renamed to ... (vec_extract<mode><ssescalarmodelower>): ... this. (vec_extract<mode><ssehalfvecmodelower>): New expander. (rotl<mode>3, rotr<mode>3, <shift_insn><mode>3, ashrv2di3): Add element mode after mode in gen_vec_init* calls. (VEC_INIT_HALF_MODE): New mode iterator. (vec_init<mode>): Renamed to ... (vec_init<mode><ssescalarmodelower>): ... this. (vec_init<mode><ssehalfvecmodelower>): New expander. * config/i386/mmx.md (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2si): Renamed to ... (vec_extractv2sisi): ... this. (vec_initv2si): Renamed to ... (vec_initv2sisi): ... this. (vec_extractv4hi): Renamed to ... (vec_extractv4hihi): ... this. (vec_initv4hi): Renamed to ... (vec_initv4hihi): ... this. (vec_extractv8qi): Renamed to ... (vec_extractv8qiqi): ... this. (vec_initv8qi): Renamed to ... (vec_initv8qiqi): ... this. * config/rs6000/vector.md (VEC_base_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><VEC_base_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><VEC_base_l>): ... this. * config/rs6000/paired.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. * config/rs6000/altivec.md (splitter, altivec_copysign_v4sf3, vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi, vec_unpacku_lo_v8hi, mulv16qi3, altivec_vreve<mode>2): Add element mode after mode in gen_vec_init* calls. * config/aarch64/aarch64-simd.md (vec_init<mode>): Renamed to ... (vec_init<mode><Vel>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><Vel>): ... this. * config/aarch64/iterators.md (Vel): New mode attribute. * config/s390/s390.c (s390_expand_vec_strlen, s390_expand_vec_movstr): Add element mode after mode in gen_vec_extract* calls. * config/s390/vector.md (non_vec_l): New mode attribute. (vec_extract<mode>): Renamed to ... (vec_extract<mode><non_vec_l>): ... this. (vec_init<mode>): Renamed to ... (vec_init<mode><non_vec_l>): ... this. * config/s390/s390-builtins.def (s390_vlgvb, s390_vlgvh, s390_vlgvf, s390_vlgvf_flt, s390_vlgvg, s390_vlgvg_dbl): Add element mode after vec_extract mode. * config/arm/iterators.md (V_elem_l): New mode attribute. * config/arm/neon.md (vec_extract<mode>): Renamed to ... (vec_extract<mode><V_elem_l>): ... this. (vec_extractv2di): Renamed to ... (vec_extractv2didi): ... this. (vec_init<mode>): Renamed to ... (vec_init<mode><V_elem_l>): ... this. (reduc_plus_scal_<mode>, reduc_plus_scal_v2di, reduc_smin_scal_<mode>, reduc_smax_scal_<mode>, reduc_umin_scal_<mode>, reduc_umax_scal_<mode>, neon_vget_lane<mode>, neon_vget_laneu<mode>): Add element mode after gen_vec_extract* calls. * config/mips/mips-msa.md (vec_init<mode>): Renamed to ... (vec_init<mode><unitmode>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><unitmode>): ... this. * config/mips/loongson.md (vec_init<mode>): Renamed to ... (vec_init<mode><unitmode>): ... this. * config/mips/mips-ps-3d.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. (reduc_plus_scal_v2sf, reduc_smin_scal_v2sf, reduc_smax_scal_v2sf): Add element mode after gen_vec_extract* calls. * config/mips/mips.md (unitmode): New mode iterator. * config/spu/spu.c (spu_expand_prologue, spu_allocate_stack, spu_builtin_extract): Add element mode after gen_vec_extract* calls. * config/spu/spu.md (inner_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><inner_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><inner_l>): ... this. * config/sparc/sparc.md (veltmode): New mode iterator. (vec_init<VMALL:mode>): Renamed to ... (vec_init<VMALL:mode><VMALL:veltmode>): ... this. * config/ia64/vect.md (vec_initv2si): Renamed to ... (vec_initv2sisi): ... this. (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. * config/powerpcspe/vector.md (VEC_base_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><VEC_base_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><VEC_base_l>): ... this. * config/powerpcspe/paired.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. * config/powerpcspe/altivec.md (splitter, altivec_copysign_v4sf3, vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi, vec_unpacku_lo_v8hi, mulv16qi3): Add element mode after mode in gen_vec_init* calls. From-SVN: r250759
2017-08-01 10:26:14 +02:00
;; Standard pattern name vec_init<mode><Vel>.
re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel) PR target/80846 * optabs.def (vec_extract_optab, vec_init_optab): Change from a direct optab to conversion optab. * optabs.c (expand_vector_broadcast): Use convert_optab_handler with GET_MODE_INNER as last argument instead of optab_handler. * expmed.c (extract_bit_field_1): Likewise. Use vector from vector extraction if possible and optab is available. * expr.c (store_constructor): Use convert_optab_handler instead of optab_handler. Use vector initialization from smaller vectors if possible and optab is available. * tree-vect-stmts.c (vectorizable_load): Likewise. * doc/md.texi (vec_extract, vec_init): Document that the optabs now have two modes. * config/i386/i386.c (ix86_expand_vector_init): Handle expansion of vec_init from half-sized vectors with the same element mode. * config/i386/sse.md (ssehalfvecmode): Add V4TI case. (ssehalfvecmodelower, ssescalarmodelower): New mode attributes. (reduc_plus_scal_v8df, reduc_plus_scal_v4df, reduc_plus_scal_v2df, reduc_plus_scal_v16sf, reduc_plus_scal_v8sf, reduc_plus_scal_v4sf, reduc_<code>_scal_<mode>, reduc_umin_scal_v8hi): Add element mode after mode in gen_vec_extract* calls. (vec_extract<mode>): Renamed to ... (vec_extract<mode><ssescalarmodelower>): ... this. (vec_extract<mode><ssehalfvecmodelower>): New expander. (rotl<mode>3, rotr<mode>3, <shift_insn><mode>3, ashrv2di3): Add element mode after mode in gen_vec_init* calls. (VEC_INIT_HALF_MODE): New mode iterator. (vec_init<mode>): Renamed to ... (vec_init<mode><ssescalarmodelower>): ... this. (vec_init<mode><ssehalfvecmodelower>): New expander. * config/i386/mmx.md (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2si): Renamed to ... (vec_extractv2sisi): ... this. (vec_initv2si): Renamed to ... (vec_initv2sisi): ... this. (vec_extractv4hi): Renamed to ... (vec_extractv4hihi): ... this. (vec_initv4hi): Renamed to ... (vec_initv4hihi): ... this. (vec_extractv8qi): Renamed to ... (vec_extractv8qiqi): ... this. (vec_initv8qi): Renamed to ... (vec_initv8qiqi): ... this. * config/rs6000/vector.md (VEC_base_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><VEC_base_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><VEC_base_l>): ... this. * config/rs6000/paired.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. * config/rs6000/altivec.md (splitter, altivec_copysign_v4sf3, vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi, vec_unpacku_lo_v8hi, mulv16qi3, altivec_vreve<mode>2): Add element mode after mode in gen_vec_init* calls. * config/aarch64/aarch64-simd.md (vec_init<mode>): Renamed to ... (vec_init<mode><Vel>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><Vel>): ... this. * config/aarch64/iterators.md (Vel): New mode attribute. * config/s390/s390.c (s390_expand_vec_strlen, s390_expand_vec_movstr): Add element mode after mode in gen_vec_extract* calls. * config/s390/vector.md (non_vec_l): New mode attribute. (vec_extract<mode>): Renamed to ... (vec_extract<mode><non_vec_l>): ... this. (vec_init<mode>): Renamed to ... (vec_init<mode><non_vec_l>): ... this. * config/s390/s390-builtins.def (s390_vlgvb, s390_vlgvh, s390_vlgvf, s390_vlgvf_flt, s390_vlgvg, s390_vlgvg_dbl): Add element mode after vec_extract mode. * config/arm/iterators.md (V_elem_l): New mode attribute. * config/arm/neon.md (vec_extract<mode>): Renamed to ... (vec_extract<mode><V_elem_l>): ... this. (vec_extractv2di): Renamed to ... (vec_extractv2didi): ... this. (vec_init<mode>): Renamed to ... (vec_init<mode><V_elem_l>): ... this. (reduc_plus_scal_<mode>, reduc_plus_scal_v2di, reduc_smin_scal_<mode>, reduc_smax_scal_<mode>, reduc_umin_scal_<mode>, reduc_umax_scal_<mode>, neon_vget_lane<mode>, neon_vget_laneu<mode>): Add element mode after gen_vec_extract* calls. * config/mips/mips-msa.md (vec_init<mode>): Renamed to ... (vec_init<mode><unitmode>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><unitmode>): ... this. * config/mips/loongson.md (vec_init<mode>): Renamed to ... (vec_init<mode><unitmode>): ... this. * config/mips/mips-ps-3d.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. (reduc_plus_scal_v2sf, reduc_smin_scal_v2sf, reduc_smax_scal_v2sf): Add element mode after gen_vec_extract* calls. * config/mips/mips.md (unitmode): New mode iterator. * config/spu/spu.c (spu_expand_prologue, spu_allocate_stack, spu_builtin_extract): Add element mode after gen_vec_extract* calls. * config/spu/spu.md (inner_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><inner_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><inner_l>): ... this. * config/sparc/sparc.md (veltmode): New mode iterator. (vec_init<VMALL:mode>): Renamed to ... (vec_init<VMALL:mode><VMALL:veltmode>): ... this. * config/ia64/vect.md (vec_initv2si): Renamed to ... (vec_initv2sisi): ... this. (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. * config/powerpcspe/vector.md (VEC_base_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><VEC_base_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><VEC_base_l>): ... this. * config/powerpcspe/paired.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. * config/powerpcspe/altivec.md (splitter, altivec_copysign_v4sf3, vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi, vec_unpacku_lo_v8hi, mulv16qi3): Add element mode after mode in gen_vec_init* calls. From-SVN: r250759
2017-08-01 10:26:14 +02:00
(define_expand "vec_init<mode><Vel>"
[(match_operand:VALL_F16 0 "register_operand")
(match_operand 1 "" "")]
"TARGET_SIMD"
{
aarch64_expand_vector_init (operands[0], operands[1]);
DONE;
})
(define_expand "vec_init<mode><Vhalf>"
[(match_operand:VQ_NO2E 0 "register_operand")
(match_operand 1 "" "")]
"TARGET_SIMD"
{
aarch64_expand_vector_init (operands[0], operands[1]);
DONE;
})
(define_insn "*aarch64_simd_ld1r<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_duplicate:VALL_F16
(match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
"TARGET_SIMD"
"ld1r\\t{%0.<Vtype>}, %1"
[(set_attr "type" "neon_load1_all_lanes")]
)
aarch64: Add machine modes for Neon vector-tuple types Until now, GCC has used large integer machine modes (OI, CI and XI) to model Neon vector-tuple types. This is suboptimal for many reasons, the most notable are: 1) Large integer modes are opaque and modifying one vector in the tuple requires a lot of inefficient set/get gymnastics. The result is a lot of superfluous move instructions. 2) Large integer modes do not map well to types that are tuples of 64-bit vectors - we need additional zero-padding which again results in superfluous move instructions. This patch adds new machine modes that better model the C-level Neon vector-tuple types. The approach is somewhat similar to that already used for SVE vector-tuple types. All of the AArch64 backend patterns and builtins that manipulate Neon vector tuples are updated to use the new machine modes. This has the effect of significantly reducing the amount of boiler-plate code in the arm_neon.h header. While this patch increases the quality of code generated in many instances, there is still room for significant improvement - which will be attempted in subsequent patches. gcc/ChangeLog: 2021-08-09 Jonathan Wright <jonathan.wright@arm.com> Richard Sandiford <richard.sandiford@arm.com> * config/aarch64/aarch64-builtins.c (v2x8qi_UP): Define. (v2x4hi_UP): Likewise. (v2x4hf_UP): Likewise. (v2x4bf_UP): Likewise. (v2x2si_UP): Likewise. (v2x2sf_UP): Likewise. (v2x1di_UP): Likewise. (v2x1df_UP): Likewise. (v2x16qi_UP): Likewise. (v2x8hi_UP): Likewise. (v2x8hf_UP): Likewise. (v2x8bf_UP): Likewise. (v2x4si_UP): Likewise. (v2x4sf_UP): Likewise. (v2x2di_UP): Likewise. (v2x2df_UP): Likewise. (v3x8qi_UP): Likewise. (v3x4hi_UP): Likewise. (v3x4hf_UP): Likewise. (v3x4bf_UP): Likewise. (v3x2si_UP): Likewise. (v3x2sf_UP): Likewise. (v3x1di_UP): Likewise. (v3x1df_UP): Likewise. (v3x16qi_UP): Likewise. (v3x8hi_UP): Likewise. (v3x8hf_UP): Likewise. (v3x8bf_UP): Likewise. (v3x4si_UP): Likewise. (v3x4sf_UP): Likewise. (v3x2di_UP): Likewise. (v3x2df_UP): Likewise. (v4x8qi_UP): Likewise. (v4x4hi_UP): Likewise. (v4x4hf_UP): Likewise. (v4x4bf_UP): Likewise. (v4x2si_UP): Likewise. (v4x2sf_UP): Likewise. (v4x1di_UP): Likewise. (v4x1df_UP): Likewise. (v4x16qi_UP): Likewise. (v4x8hi_UP): Likewise. (v4x8hf_UP): Likewise. (v4x8bf_UP): Likewise. (v4x4si_UP): Likewise. (v4x4sf_UP): Likewise. (v4x2di_UP): Likewise. (v4x2df_UP): Likewise. (TYPES_GETREGP): Delete. (TYPES_SETREGP): Likewise. (TYPES_LOADSTRUCT_U): Define. (TYPES_LOADSTRUCT_P): Likewise. (TYPES_LOADSTRUCT_LANE_U): Likewise. (TYPES_LOADSTRUCT_LANE_P): Likewise. (TYPES_STORE1P): Move for consistency. (TYPES_STORESTRUCT_U): Define. (TYPES_STORESTRUCT_P): Likewise. (TYPES_STORESTRUCT_LANE_U): Likewise. (TYPES_STORESTRUCT_LANE_P): Likewise. (aarch64_simd_tuple_types): Define. (aarch64_lookup_simd_builtin_type): Handle tuple type lookup. (aarch64_init_simd_builtin_functions): Update frontend lookup for builtin functions after handling arm_neon.h pragma. (register_tuple_type): Manually set modes of single-integer tuple types. Record tuple types. * config/aarch64/aarch64-modes.def (ADV_SIMD_D_REG_STRUCT_MODES): Define D-register tuple modes. (ADV_SIMD_Q_REG_STRUCT_MODES): Define Q-register tuple modes. (SVE_MODES): Give single-vector modes priority over vector- tuple modes. (VECTOR_MODES_WITH_PREFIX): Set partial-vector mode order to be after all single-vector modes. * config/aarch64/aarch64-simd-builtins.def: Update builtin generator macros to reflect modifications to the backend patterns. * config/aarch64/aarch64-simd.md (aarch64_simd_ld2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2<vstruct_elt>): This. (aarch64_simd_ld2r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld2r<vstruct_elt>): This. (aarch64_vec_load_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_load_lanes<mode>_lane<vstruct_elt>): This. (vec_load_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanes<mode><vstruct_elt>): This. (aarch64_simd_st2<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st2<vstruct_elt>): This. (aarch64_vec_store_lanesoi_lane<mode>): Use vector-tuple mode iterator and rename to... (aarch64_vec_store_lanes<mode>_lane<vstruct_elt>): This. (vec_store_lanesoi<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanes<mode><vstruct_elt>): This. (aarch64_simd_ld3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3<vstruct_elt>): This. (aarch64_simd_ld3r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld3r<vstruct_elt>): This. (aarch64_vec_load_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesci<mode>): This. (aarch64_simd_st3<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st3<vstruct_elt>): This. (aarch64_vec_store_lanesci_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesci<mode>): This. (aarch64_simd_ld4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4<vstruct_elt>): This. (aarch64_simd_ld4r<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld4r<vstruct_elt>): This. (aarch64_vec_load_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_load_lanesxi<mode>): This. (aarch64_simd_st4<mode>): Use vector-tuple mode iterator and rename to... (aarch64_simd_st4<vstruct_elt>): This. (aarch64_vec_store_lanesxi_lane<mode>): Use vector-tuple mode iterator and rename to... (vec_store_lanesxi<mode>): This. (mov<mode>): Define for Neon vector-tuple modes. (aarch64_ld1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x3<vstruct_elt>): This. (aarch64_ld1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x3_<vstruct_elt>): This. (aarch64_ld1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x4<vstruct_elt>): This. (aarch64_ld1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1_x4_<vstruct_elt>): This. (aarch64_st1x2<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x2<vstruct_elt>): This. (aarch64_st1_x2_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x2_<vstruct_elt>): This. (aarch64_st1x3<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x3<vstruct_elt>): This. (aarch64_st1_x3_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x3_<vstruct_elt>): This. (aarch64_st1x4<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1x4<vstruct_elt>): This. (aarch64_st1_x4_<mode>): Use vector-tuple mode iterator and rename to... (aarch64_st1_x4_<vstruct_elt>): This. (*aarch64_mov<mode>): Define for vector-tuple modes. (*aarch64_be_mov<mode>): Likewise. (aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs>r<vstruct_elt>): This. (aarch64_ld2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld2<vstruct_elt>_dreg): This. (aarch64_ld3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld3<vstruct_elt>_dreg): This. (aarch64_ld4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_ld4<vstruct_elt>_dreg): This. (aarch64_ld<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld<nregs><vstruct_elt>): Use vector-tuple mode iterator and rename to... (aarch64_ld<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode (aarch64_ld1x2<VQ:mode>): Delete. (aarch64_ld1x2<VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_ld1x2<vstruct_elt>): This. (aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_ld<nregs>_lane<vstruct_elt>): This. (aarch64_get_dreg<VSTRUCT:mode><VDC:mode>): Delete. (aarch64_get_qreg<VSTRUCT:mode><VQ:mode>): Likewise. (aarch64_st2<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st2<vstruct_elt>_dreg): This. (aarch64_st3<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st3<vstruct_elt>_dreg): This. (aarch64_st4<mode>_dreg): Use vector-tuple mode iterator and rename to... (aarch64_st4<vstruct_elt>_dreg): This. (aarch64_st<VSTRUCT:nregs><VDC:mode>): Use vector-tuple mode iterator and rename to... (aarch64_st<nregs><vstruct_elt>): This. (aarch64_st<VSTRUCT:nregs><VQ:mode>): Use vector-tuple mode iterator and rename to aarch64_st<nregs><vstruct_elt>. (aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>): Use vector- tuple mode iterator and rename to... (aarch64_st<nregs>_lane<vstruct_elt>): This. (aarch64_set_qreg<VSTRUCT:mode><VQ:mode>): Delete. (aarch64_simd_ld1<mode>_x2): Use vector-tuple mode iterator and rename to... (aarch64_simd_ld1<vstruct_elt>_x2): This. * config/aarch64/aarch64.c (aarch64_advsimd_struct_mode_p): Refactor to include new vector-tuple modes. (aarch64_classify_vector_mode): Add cases for new vector- tuple modes. (aarch64_advsimd_partial_struct_mode_p): Define. (aarch64_advsimd_full_struct_mode_p): Likewise. (aarch64_advsimd_vector_array_mode): Likewise. (aarch64_sve_data_mode): Change location in file. (aarch64_array_mode): Handle case of Neon vector-tuple modes. (aarch64_hard_regno_nregs): Handle case of partial Neon vector structures. (aarch64_classify_address): Refactor to include handling of Neon vector-tuple modes. (aarch64_print_operand): Print "d" for "%R" for a partial Neon vector structure. (aarch64_expand_vec_perm_1): Use new vector-tuple mode. (aarch64_modes_tieable_p): Prevent tieing Neon partial struct modes with scalar machines modes larger than 8 bytes. (aarch64_can_change_mode_class): Don't allow changes between partial and full Neon vector-structure modes. * config/aarch64/arm_neon.h (vst2_lane_f16): Use updated builtin and remove boiler-plate code for opaque mode. (vst2_lane_f32): Likewise. (vst2_lane_f64): Likewise. (vst2_lane_p8): Likewise. (vst2_lane_p16): Likewise. (vst2_lane_p64): Likewise. (vst2_lane_s8): Likewise. (vst2_lane_s16): Likewise. (vst2_lane_s32): Likewise. (vst2_lane_s64): Likewise. (vst2_lane_u8): Likewise. (vst2_lane_u16): Likewise. (vst2_lane_u32): Likewise. (vst2_lane_u64): Likewise. (vst2q_lane_f16): Likewise. (vst2q_lane_f32): Likewise. (vst2q_lane_f64): Likewise. (vst2q_lane_p8): Likewise. (vst2q_lane_p16): Likewise. (vst2q_lane_p64): Likewise. (vst2q_lane_s8): Likewise. (vst2q_lane_s16): Likewise. (vst2q_lane_s32): Likewise. (vst2q_lane_s64): Likewise. (vst2q_lane_u8): Likewise. (vst2q_lane_u16): Likewise. (vst2q_lane_u32): Likewise. (vst2q_lane_u64): Likewise. (vst3_lane_f16): Likewise. (vst3_lane_f32): Likewise. (vst3_lane_f64): Likewise. (vst3_lane_p8): Likewise. (vst3_lane_p16): Likewise. (vst3_lane_p64): Likewise. (vst3_lane_s8): Likewise. (vst3_lane_s16): Likewise. (vst3_lane_s32): Likewise. (vst3_lane_s64): Likewise. (vst3_lane_u8): Likewise. (vst3_lane_u16): Likewise. (vst3_lane_u32): Likewise. (vst3_lane_u64): Likewise. (vst3q_lane_f16): Likewise. (vst3q_lane_f32): Likewise. (vst3q_lane_f64): Likewise. (vst3q_lane_p8): Likewise. (vst3q_lane_p16): Likewise. (vst3q_lane_p64): Likewise. (vst3q_lane_s8): Likewise. (vst3q_lane_s16): Likewise. (vst3q_lane_s32): Likewise. (vst3q_lane_s64): Likewise. (vst3q_lane_u8): Likewise. (vst3q_lane_u16): Likewise. (vst3q_lane_u32): Likewise. (vst3q_lane_u64): Likewise. (vst4_lane_f16): Likewise. (vst4_lane_f32): Likewise. (vst4_lane_f64): Likewise. (vst4_lane_p8): Likewise. (vst4_lane_p16): Likewise. (vst4_lane_p64): Likewise. (vst4_lane_s8): Likewise. (vst4_lane_s16): Likewise. (vst4_lane_s32): Likewise. (vst4_lane_s64): Likewise. (vst4_lane_u8): Likewise. (vst4_lane_u16): Likewise. (vst4_lane_u32): Likewise. (vst4_lane_u64): Likewise. (vst4q_lane_f16): Likewise. (vst4q_lane_f32): Likewise. (vst4q_lane_f64): Likewise. (vst4q_lane_p8): Likewise. (vst4q_lane_p16): Likewise. (vst4q_lane_p64): Likewise. (vst4q_lane_s8): Likewise. (vst4q_lane_s16): Likewise. (vst4q_lane_s32): Likewise. (vst4q_lane_s64): Likewise. (vst4q_lane_u8): Likewise. (vst4q_lane_u16): Likewise. (vst4q_lane_u32): Likewise. (vst4q_lane_u64): Likewise. (vtbl3_s8): Likewise. (vtbl3_u8): Likewise. (vtbl3_p8): Likewise. (vtbl4_s8): Likewise. (vtbl4_u8): Likewise. (vtbl4_p8): Likewise. (vld1_u8_x3): Likewise. (vld1_s8_x3): Likewise. (vld1_u16_x3): Likewise. (vld1_s16_x3): Likewise. (vld1_u32_x3): Likewise. (vld1_s32_x3): Likewise. (vld1_u64_x3): Likewise. (vld1_s64_x3): Likewise. (vld1_f16_x3): Likewise. (vld1_f32_x3): Likewise. (vld1_f64_x3): Likewise. (vld1_p8_x3): Likewise. (vld1_p16_x3): Likewise. (vld1_p64_x3): Likewise. (vld1q_u8_x3): Likewise. (vld1q_s8_x3): Likewise. (vld1q_u16_x3): Likewise. (vld1q_s16_x3): Likewise. (vld1q_u32_x3): Likewise. (vld1q_s32_x3): Likewise. (vld1q_u64_x3): Likewise. (vld1q_s64_x3): Likewise. (vld1q_f16_x3): Likewise. (vld1q_f32_x3): Likewise. (vld1q_f64_x3): Likewise. (vld1q_p8_x3): Likewise. (vld1q_p16_x3): Likewise. (vld1q_p64_x3): Likewise. (vld1_u8_x2): Likewise. (vld1_s8_x2): Likewise. (vld1_u16_x2): Likewise. (vld1_s16_x2): Likewise. (vld1_u32_x2): Likewise. (vld1_s32_x2): Likewise. (vld1_u64_x2): Likewise. (vld1_s64_x2): Likewise. (vld1_f16_x2): Likewise. (vld1_f32_x2): Likewise. (vld1_f64_x2): Likewise. (vld1_p8_x2): Likewise. (vld1_p16_x2): Likewise. (vld1_p64_x2): Likewise. (vld1q_u8_x2): Likewise. (vld1q_s8_x2): Likewise. (vld1q_u16_x2): Likewise. (vld1q_s16_x2): Likewise. (vld1q_u32_x2): Likewise. (vld1q_s32_x2): Likewise. (vld1q_u64_x2): Likewise. (vld1q_s64_x2): Likewise. (vld1q_f16_x2): Likewise. (vld1q_f32_x2): Likewise. (vld1q_f64_x2): Likewise. (vld1q_p8_x2): Likewise. (vld1q_p16_x2): Likewise. (vld1q_p64_x2): Likewise. (vld1_s8_x4): Likewise. (vld1q_s8_x4): Likewise. (vld1_s16_x4): Likewise. (vld1q_s16_x4): Likewise. (vld1_s32_x4): Likewise. (vld1q_s32_x4): Likewise. (vld1_u8_x4): Likewise. (vld1q_u8_x4): Likewise. (vld1_u16_x4): Likewise. (vld1q_u16_x4): Likewise. (vld1_u32_x4): Likewise. (vld1q_u32_x4): Likewise. (vld1_f16_x4): Likewise. (vld1q_f16_x4): Likewise. (vld1_f32_x4): Likewise. (vld1q_f32_x4): Likewise. (vld1_p8_x4): Likewise. (vld1q_p8_x4): Likewise. (vld1_p16_x4): Likewise. (vld1q_p16_x4): Likewise. (vld1_s64_x4): Likewise. (vld1_u64_x4): Likewise. (vld1_p64_x4): Likewise. (vld1q_s64_x4): Likewise. (vld1q_u64_x4): Likewise. (vld1q_p64_x4): Likewise. (vld1_f64_x4): Likewise. (vld1q_f64_x4): Likewise. (vld2_s64): Likewise. (vld2_u64): Likewise. (vld2_f64): Likewise. (vld2_s8): Likewise. (vld2_p8): Likewise. (vld2_p64): Likewise. (vld2_s16): Likewise. (vld2_p16): Likewise. (vld2_s32): Likewise. (vld2_u8): Likewise. (vld2_u16): Likewise. (vld2_u32): Likewise. (vld2_f16): Likewise. (vld2_f32): Likewise. (vld2q_s8): Likewise. (vld2q_p8): Likewise. (vld2q_s16): Likewise. (vld2q_p16): Likewise. (vld2q_p64): Likewise. (vld2q_s32): Likewise. (vld2q_s64): Likewise. (vld2q_u8): Likewise. (vld2q_u16): Likewise. (vld2q_u32): Likewise. (vld2q_u64): Likewise. (vld2q_f16): Likewise. (vld2q_f32): Likewise. (vld2q_f64): Likewise. (vld3_s64): Likewise. (vld3_u64): Likewise. (vld3_f64): Likewise. (vld3_s8): Likewise. (vld3_p8): Likewise. (vld3_s16): Likewise. (vld3_p16): Likewise. (vld3_s32): Likewise. (vld3_u8): Likewise. (vld3_u16): Likewise. (vld3_u32): Likewise. (vld3_f16): Likewise. (vld3_f32): Likewise. (vld3_p64): Likewise. (vld3q_s8): Likewise. (vld3q_p8): Likewise. (vld3q_s16): Likewise. (vld3q_p16): Likewise. (vld3q_s32): Likewise. (vld3q_s64): Likewise. (vld3q_u8): Likewise. (vld3q_u16): Likewise. (vld3q_u32): Likewise. (vld3q_u64): Likewise. (vld3q_f16): Likewise. (vld3q_f32): Likewise. (vld3q_f64): Likewise. (vld3q_p64): Likewise. (vld4_s64): Likewise. (vld4_u64): Likewise. (vld4_f64): Likewise. (vld4_s8): Likewise. (vld4_p8): Likewise. (vld4_s16): Likewise. (vld4_p16): Likewise. (vld4_s32): Likewise. (vld4_u8): Likewise. (vld4_u16): Likewise. (vld4_u32): Likewise. (vld4_f16): Likewise. (vld4_f32): Likewise. (vld4_p64): Likewise. (vld4q_s8): Likewise. (vld4q_p8): Likewise. (vld4q_s16): Likewise. (vld4q_p16): Likewise. (vld4q_s32): Likewise. (vld4q_s64): Likewise. (vld4q_u8): Likewise. (vld4q_u16): Likewise. (vld4q_u32): Likewise. (vld4q_u64): Likewise. (vld4q_f16): Likewise. (vld4q_f32): Likewise. (vld4q_f64): Likewise. (vld4q_p64): Likewise. (vld2_dup_s8): Likewise. (vld2_dup_s16): Likewise. (vld2_dup_s32): Likewise. (vld2_dup_f16): Likewise. (vld2_dup_f32): Likewise. (vld2_dup_f64): Likewise. (vld2_dup_u8): Likewise. (vld2_dup_u16): Likewise. (vld2_dup_u32): Likewise. (vld2_dup_p8): Likewise. (vld2_dup_p16): Likewise. (vld2_dup_p64): Likewise. (vld2_dup_s64): Likewise. (vld2_dup_u64): Likewise. (vld2q_dup_s8): Likewise. (vld2q_dup_p8): Likewise. (vld2q_dup_s16): Likewise. (vld2q_dup_p16): Likewise. (vld2q_dup_s32): Likewise. (vld2q_dup_s64): Likewise. (vld2q_dup_u8): Likewise. (vld2q_dup_u16): Likewise. (vld2q_dup_u32): Likewise. (vld2q_dup_u64): Likewise. (vld2q_dup_f16): Likewise. (vld2q_dup_f32): Likewise. (vld2q_dup_f64): Likewise. (vld2q_dup_p64): Likewise. (vld3_dup_s64): Likewise. (vld3_dup_u64): Likewise. (vld3_dup_f64): Likewise. (vld3_dup_s8): Likewise. (vld3_dup_p8): Likewise. (vld3_dup_s16): Likewise. (vld3_dup_p16): Likewise. (vld3_dup_s32): Likewise. (vld3_dup_u8): Likewise. (vld3_dup_u16): Likewise. (vld3_dup_u32): Likewise. (vld3_dup_f16): Likewise. (vld3_dup_f32): Likewise. (vld3_dup_p64): Likewise. (vld3q_dup_s8): Likewise. (vld3q_dup_p8): Likewise. (vld3q_dup_s16): Likewise. (vld3q_dup_p16): Likewise. (vld3q_dup_s32): Likewise. (vld3q_dup_s64): Likewise. (vld3q_dup_u8): Likewise. (vld3q_dup_u16): Likewise. (vld3q_dup_u32): Likewise. (vld3q_dup_u64): Likewise. (vld3q_dup_f16): Likewise. (vld3q_dup_f32): Likewise. (vld3q_dup_f64): Likewise. (vld3q_dup_p64): Likewise. (vld4_dup_s64): Likewise. (vld4_dup_u64): Likewise. (vld4_dup_f64): Likewise. (vld4_dup_s8): Likewise. (vld4_dup_p8): Likewise. (vld4_dup_s16): Likewise. (vld4_dup_p16): Likewise. (vld4_dup_s32): Likewise. (vld4_dup_u8): Likewise. (vld4_dup_u16): Likewise. (vld4_dup_u32): Likewise. (vld4_dup_f16): Likewise. (vld4_dup_f32): Likewise. (vld4_dup_p64): Likewise. (vld4q_dup_s8): Likewise. (vld4q_dup_p8): Likewise. (vld4q_dup_s16): Likewise. (vld4q_dup_p16): Likewise. (vld4q_dup_s32): Likewise. (vld4q_dup_s64): Likewise. (vld4q_dup_u8): Likewise. (vld4q_dup_u16): Likewise. (vld4q_dup_u32): Likewise. (vld4q_dup_u64): Likewise. (vld4q_dup_f16): Likewise. (vld4q_dup_f32): Likewise. (vld4q_dup_f64): Likewise. (vld4q_dup_p64): Likewise. (vld2_lane_u8): Likewise. (vld2_lane_u16): Likewise. (vld2_lane_u32): Likewise. (vld2_lane_u64): Likewise. (vld2_lane_s8): Likewise. (vld2_lane_s16): Likewise. (vld2_lane_s32): Likewise. (vld2_lane_s64): Likewise. (vld2_lane_f16): Likewise. (vld2_lane_f32): Likewise. (vld2_lane_f64): Likewise. (vld2_lane_p8): Likewise. (vld2_lane_p16): Likewise. (vld2_lane_p64): Likewise. (vld2q_lane_u8): Likewise. (vld2q_lane_u16): Likewise. (vld2q_lane_u32): Likewise. (vld2q_lane_u64): Likewise. (vld2q_lane_s8): Likewise. (vld2q_lane_s16): Likewise. (vld2q_lane_s32): Likewise. (vld2q_lane_s64): Likewise. (vld2q_lane_f16): Likewise. (vld2q_lane_f32): Likewise. (vld2q_lane_f64): Likewise. (vld2q_lane_p8): Likewise. (vld2q_lane_p16): Likewise. (vld2q_lane_p64): Likewise. (vld3_lane_u8): Likewise. (vld3_lane_u16): Likewise. (vld3_lane_u32): Likewise. (vld3_lane_u64): Likewise. (vld3_lane_s8): Likewise. (vld3_lane_s16): Likewise. (vld3_lane_s32): Likewise. (vld3_lane_s64): Likewise. (vld3_lane_f16): Likewise. (vld3_lane_f32): Likewise. (vld3_lane_f64): Likewise. (vld3_lane_p8): Likewise. (vld3_lane_p16): Likewise. (vld3_lane_p64): Likewise. (vld3q_lane_u8): Likewise. (vld3q_lane_u16): Likewise. (vld3q_lane_u32): Likewise. (vld3q_lane_u64): Likewise. (vld3q_lane_s8): Likewise. (vld3q_lane_s16): Likewise. (vld3q_lane_s32): Likewise. (vld3q_lane_s64): Likewise. (vld3q_lane_f16): Likewise. (vld3q_lane_f32): Likewise. (vld3q_lane_f64): Likewise. (vld3q_lane_p8): Likewise. (vld3q_lane_p16): Likewise. (vld3q_lane_p64): Likewise. (vld4_lane_u8): Likewise. (vld4_lane_u16): Likewise. (vld4_lane_u32): Likewise. (vld4_lane_u64): Likewise. (vld4_lane_s8): Likewise. (vld4_lane_s16): Likewise. (vld4_lane_s32): Likewise. (vld4_lane_s64): Likewise. (vld4_lane_f16): Likewise. (vld4_lane_f32): Likewise. (vld4_lane_f64): Likewise. (vld4_lane_p8): Likewise. (vld4_lane_p16): Likewise. (vld4_lane_p64): Likewise. (vld4q_lane_u8): Likewise. (vld4q_lane_u16): Likewise. (vld4q_lane_u32): Likewise. (vld4q_lane_u64): Likewise. (vld4q_lane_s8): Likewise. (vld4q_lane_s16): Likewise. (vld4q_lane_s32): Likewise. (vld4q_lane_s64): Likewise. (vld4q_lane_f16): Likewise. (vld4q_lane_f32): Likewise. (vld4q_lane_f64): Likewise. (vld4q_lane_p8): Likewise. (vld4q_lane_p16): Likewise. (vld4q_lane_p64): Likewise. (vqtbl2_s8): Likewise. (vqtbl2_u8): Likewise. (vqtbl2_p8): Likewise. (vqtbl2q_s8): Likewise. (vqtbl2q_u8): Likewise. (vqtbl2q_p8): Likewise. (vqtbl3_s8): Likewise. (vqtbl3_u8): Likewise. (vqtbl3_p8): Likewise. (vqtbl3q_s8): Likewise. (vqtbl3q_u8): Likewise. (vqtbl3q_p8): Likewise. (vqtbl4_s8): Likewise. (vqtbl4_u8): Likewise. (vqtbl4_p8): Likewise. (vqtbl4q_s8): Likewise. (vqtbl4q_u8): Likewise. (vqtbl4q_p8): Likewise. (vqtbx2_s8): Likewise. (vqtbx2_u8): Likewise. (vqtbx2_p8): Likewise. (vqtbx2q_s8): Likewise. (vqtbx2q_u8): Likewise. (vqtbx2q_p8): Likewise. (vqtbx3_s8): Likewise. (vqtbx3_u8): Likewise. (vqtbx3_p8): Likewise. (vqtbx3q_s8): Likewise. (vqtbx3q_u8): Likewise. (vqtbx3q_p8): Likewise. (vqtbx4_s8): Likewise. (vqtbx4_u8): Likewise. (vqtbx4_p8): Likewise. (vqtbx4q_s8): Likewise. (vqtbx4q_u8): Likewise. (vqtbx4q_p8): Likewise. (vst1_s64_x2): Likewise. (vst1_u64_x2): Likewise. (vst1_f64_x2): Likewise. (vst1_s8_x2): Likewise. (vst1_p8_x2): Likewise. (vst1_s16_x2): Likewise. (vst1_p16_x2): Likewise. (vst1_s32_x2): Likewise. (vst1_u8_x2): Likewise. (vst1_u16_x2): Likewise. (vst1_u32_x2): Likewise. (vst1_f16_x2): Likewise. (vst1_f32_x2): Likewise. (vst1_p64_x2): Likewise. (vst1q_s8_x2): Likewise. (vst1q_p8_x2): Likewise. (vst1q_s16_x2): Likewise. (vst1q_p16_x2): Likewise. (vst1q_s32_x2): Likewise. (vst1q_s64_x2): Likewise. (vst1q_u8_x2): Likewise. (vst1q_u16_x2): Likewise. (vst1q_u32_x2): Likewise. (vst1q_u64_x2): Likewise. (vst1q_f16_x2): Likewise. (vst1q_f32_x2): Likewise. (vst1q_f64_x2): Likewise. (vst1q_p64_x2): Likewise. (vst1_s64_x3): Likewise. (vst1_u64_x3): Likewise. (vst1_f64_x3): Likewise. (vst1_s8_x3): Likewise. (vst1_p8_x3): Likewise. (vst1_s16_x3): Likewise. (vst1_p16_x3): Likewise. (vst1_s32_x3): Likewise. (vst1_u8_x3): Likewise. (vst1_u16_x3): Likewise. (vst1_u32_x3): Likewise. (vst1_f16_x3): Likewise. (vst1_f32_x3): Likewise. (vst1_p64_x3): Likewise. (vst1q_s8_x3): Likewise. (vst1q_p8_x3): Likewise. (vst1q_s16_x3): Likewise. (vst1q_p16_x3): Likewise. (vst1q_s32_x3): Likewise. (vst1q_s64_x3): Likewise. (vst1q_u8_x3): Likewise. (vst1q_u16_x3): Likewise. (vst1q_u32_x3): Likewise. (vst1q_u64_x3): Likewise. (vst1q_f16_x3): Likewise. (vst1q_f32_x3): Likewise. (vst1q_f64_x3): Likewise. (vst1q_p64_x3): Likewise. (vst1_s8_x4): Likewise. (vst1q_s8_x4): Likewise. (vst1_s16_x4): Likewise. (vst1q_s16_x4): Likewise. (vst1_s32_x4): Likewise. (vst1q_s32_x4): Likewise. (vst1_u8_x4): Likewise. (vst1q_u8_x4): Likewise. (vst1_u16_x4): Likewise. (vst1q_u16_x4): Likewise. (vst1_u32_x4): Likewise. (vst1q_u32_x4): Likewise. (vst1_f16_x4): Likewise. (vst1q_f16_x4): Likewise. (vst1_f32_x4): Likewise. (vst1q_f32_x4): Likewise. (vst1_p8_x4): Likewise. (vst1q_p8_x4): Likewise. (vst1_p16_x4): Likewise. (vst1q_p16_x4): Likewise. (vst1_s64_x4): Likewise. (vst1_u64_x4): Likewise. (vst1_p64_x4): Likewise. (vst1q_s64_x4): Likewise. (vst1q_u64_x4): Likewise. (vst1q_p64_x4): Likewise. (vst1_f64_x4): Likewise. (vst1q_f64_x4): Likewise. (vst2_s64): Likewise. (vst2_u64): Likewise. (vst2_f64): Likewise. (vst2_s8): Likewise. (vst2_p8): Likewise. (vst2_s16): Likewise. (vst2_p16): Likewise. (vst2_s32): Likewise. (vst2_u8): Likewise. (vst2_u16): Likewise. (vst2_u32): Likewise. (vst2_f16): Likewise. (vst2_f32): Likewise. (vst2_p64): Likewise. (vst2q_s8): Likewise. (vst2q_p8): Likewise. (vst2q_s16): Likewise. (vst2q_p16): Likewise. (vst2q_s32): Likewise. (vst2q_s64): Likewise. (vst2q_u8): Likewise. (vst2q_u16): Likewise. (vst2q_u32): Likewise. (vst2q_u64): Likewise. (vst2q_f16): Likewise. (vst2q_f32): Likewise. (vst2q_f64): Likewise. (vst2q_p64): Likewise. (vst3_s64): Likewise. (vst3_u64): Likewise. (vst3_f64): Likewise. (vst3_s8): Likewise. (vst3_p8): Likewise. (vst3_s16): Likewise. (vst3_p16): Likewise. (vst3_s32): Likewise. (vst3_u8): Likewise. (vst3_u16): Likewise. (vst3_u32): Likewise. (vst3_f16): Likewise. (vst3_f32): Likewise. (vst3_p64): Likewise. (vst3q_s8): Likewise. (vst3q_p8): Likewise. (vst3q_s16): Likewise. (vst3q_p16): Likewise. (vst3q_s32): Likewise. (vst3q_s64): Likewise. (vst3q_u8): Likewise. (vst3q_u16): Likewise. (vst3q_u32): Likewise. (vst3q_u64): Likewise. (vst3q_f16): Likewise. (vst3q_f32): Likewise. (vst3q_f64): Likewise. (vst3q_p64): Likewise. (vst4_s64): Likewise. (vst4_u64): Likewise. (vst4_f64): Likewise. (vst4_s8): Likewise. (vst4_p8): Likewise. (vst4_s16): Likewise. (vst4_p16): Likewise. (vst4_s32): Likewise. (vst4_u8): Likewise. (vst4_u16): Likewise. (vst4_u32): Likewise. (vst4_f16): Likewise. (vst4_f32): Likewise. (vst4_p64): Likewise. (vst4q_s8): Likewise. (vst4q_p8): Likewise. (vst4q_s16): Likewise. (vst4q_p16): Likewise. (vst4q_s32): Likewise. (vst4q_s64): Likewise. (vst4q_u8): Likewise. (vst4q_u16): Likewise. (vst4q_u32): Likewise. (vst4q_u64): Likewise. (vst4q_f16): Likewise. (vst4q_f32): Likewise. (vst4q_f64): Likewise. (vst4q_p64): Likewise. (vtbx4_s8): Likewise. (vtbx4_u8): Likewise. (vtbx4_p8): Likewise. (vld1_bf16_x2): Likewise. (vld1q_bf16_x2): Likewise. (vld1_bf16_x3): Likewise. (vld1q_bf16_x3): Likewise. (vld1_bf16_x4): Likewise. (vld1q_bf16_x4): Likewise. (vld2_bf16): Likewise. (vld2q_bf16): Likewise. (vld2_dup_bf16): Likewise. (vld2q_dup_bf16): Likewise. (vld3_bf16): Likewise. (vld3q_bf16): Likewise. (vld3_dup_bf16): Likewise. (vld3q_dup_bf16): Likewise. (vld4_bf16): Likewise. (vld4q_bf16): Likewise. (vld4_dup_bf16): Likewise. (vld4q_dup_bf16): Likewise. (vst1_bf16_x2): Likewise. (vst1q_bf16_x2): Likewise. (vst1_bf16_x3): Likewise. (vst1q_bf16_x3): Likewise. (vst1_bf16_x4): Likewise. (vst1q_bf16_x4): Likewise. (vst2_bf16): Likewise. (vst2q_bf16): Likewise. (vst3_bf16): Likewise. (vst3q_bf16): Likewise. (vst4_bf16): Likewise. (vst4q_bf16): Likewise. (vld2_lane_bf16): Likewise. (vld2q_lane_bf16): Likewise. (vld3_lane_bf16): Likewise. (vld3q_lane_bf16): Likewise. (vld4_lane_bf16): Likewise. (vld4q_lane_bf16): Likewise. (vst2_lane_bf16): Likewise. (vst2q_lane_bf16): Likewise. (vst3_lane_bf16): Likewise. (vst3q_lane_bf16): Likewise. (vst4_lane_bf16): Likewise. (vst4q_lane_bf16): Likewise. * config/aarch64/geniterators.sh: Modify iterator regex to match new vector-tuple modes. * config/aarch64/iterators.md (insn_count): Extend mode attribute with vector-tuple type information. (nregs): Likewise. (Vendreg): Likewise. (Vetype): Likewise. (Vtype): Likewise. (VSTRUCT_2D): New mode iterator. (VSTRUCT_2DNX): Likewise. (VSTRUCT_2DX): Likewise. (VSTRUCT_2Q): Likewise. (VSTRUCT_2QD): Likewise. (VSTRUCT_3D): Likewise. (VSTRUCT_3DNX): Likewise. (VSTRUCT_3DX): Likewise. (VSTRUCT_3Q): Likewise. (VSTRUCT_3QD): Likewise. (VSTRUCT_4D): Likewise. (VSTRUCT_4DNX): Likewise. (VSTRUCT_4DX): Likewise. (VSTRUCT_4Q): Likewise. (VSTRUCT_4QD): Likewise. (VSTRUCT_D): Likewise. (VSTRUCT_Q): Likewise. (VSTRUCT_QD): Likewise. (VSTRUCT_ELT): New mode attribute. (vstruct_elt): Likewise. * genmodes.c (VECTOR_MODE): Add default prefix and order parameters. (VECTOR_MODE_WITH_PREFIX): Define. (make_vector_mode): Add mode prefix and order parameters. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/bf16_vldN_lane_2.c: Relax incorrect register number requirement. * gcc.target/aarch64/sve/pcs/struct_3_256.c: Accept equivalent codegen with fmov.
2021-08-09 16:26:48 +02:00
(define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
[(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
(unspec:VSTRUCT_2QD [
(match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
UNSPEC_LD1))]
"TARGET_SIMD"
"ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
[(set_attr "type" "neon_load1_2reg<q>")]
)
[gen/AArch64] Generate helpers for substituting iterator values into pattern names Given a pattern like: (define_insn "aarch64_frecpe<mode>" ...) the SVE ACLE implementation wants to generate the pattern for a particular (non-constant) mode. This patch automatically generates helpers to do that, specifically: // Return CODE_FOR_nothing on failure. insn_code maybe_code_for_aarch64_frecpe (machine_mode); // Assert that the code exists. insn_code code_for_aarch64_frecpe (machine_mode); // Return NULL_RTX on failure. rtx maybe_gen_aarch64_frecpe (machine_mode, rtx, rtx); // Assert that generation succeeds. rtx gen_aarch64_frecpe (machine_mode, rtx, rtx); Many patterns don't have sensible names when all <...>s are removed. E.g. "<optab><mode>2" would give a base name "2". The new functions therefore require explicit opt-in, which should also help to reduce code bloat. The (arbitrary) opt-in syntax I went for was to prefix the pattern name with '@', similarly to the existing '*' marker. The patch also makes config/aarch64 use the new routines in cases where they obviously apply. This was mostly straight-forward, but it seemed odd that we defined: aarch64_reload_movcp<...><P:mode> but then only used it with DImode, never SImode. If we should be using Pmode instead of DImode, then that's a simple change, but should probably be a separate patch. 2018-08-02 Richard Sandiford <richard.sandiford@arm.com> gcc/ * doc/md.texi: Expand the documentation of instruction names to mention port-local uses. Document '@' in pattern names. * read-md.h (overloaded_instance, overloaded_name): New structs. (mapping): Declare. (md_reader::handle_overloaded_name): New member function. (md_reader::get_overloads): Likewise. (md_reader::m_first_overload): New member variable. (md_reader::m_next_overload_ptr): Likewise. (md_reader::m_overloads_htab): Likewise. * read-md.c (md_reader::md_reader): Initialize m_first_overload, m_next_overload_ptr and m_overloads_htab. * read-rtl.c (iterator_group): Add "type" and "get_c_token" fields. (get_mode_token, get_code_token, get_int_token): New functions. (map_attr_string): Add an optional argument that passes back the associated iterator. (overloaded_name_hash, overloaded_name_eq_p, named_rtx_p): (md_reader::handle_overloaded_name, add_overload_instance): New functions. (apply_iterators): Handle '@' names. Report an error if '@' is used without iterators. (initialize_iterators): Initialize the new iterator_group fields. * genopinit.c (handle_overloaded_code_for) (handle_overloaded_gen): New functions. (main): Use them to print declarations of maybe_code_for_* and maybe_gen_* functions, and inline definitions of code_for_* and gen_*. * genemit.c (print_overload_arguments, print_overload_test) (handle_overloaded_code_for, handle_overloaded_gen): New functions. (main): Use it to print definitions of maybe_code_for_* and maybe_gen_* functions. * config/aarch64/aarch64.c (aarch64_split_128bit_move): Use gen_aarch64_mov{low,high}_di and gen_aarch64_movdi_{low,high} instead of explicit mode checks. (aarch64_split_simd_combine): Likewise gen_aarch64_simd_combine. (aarch64_split_simd_move): Likewise gen_aarch64_split_simd_mov. (aarch64_emit_load_exclusive): Likewise gen_aarch64_load_exclusive. (aarch64_emit_store_exclusive): Likewise gen_aarch64_store_exclusive. (aarch64_expand_compare_and_swap): Likewise gen_aarch64_compare_and_swap and gen_aarch64_compare_and_swap_lse (aarch64_gen_atomic_cas): Likewise gen_aarch64_atomic_cas. (aarch64_emit_atomic_swap): Likewise gen_aarch64_atomic_swp. (aarch64_constant_pool_reload_icode): Delete. (aarch64_secondary_reload): Use code_for_aarch64_reload_movcp instead of aarch64_constant_pool_reload_icode. Use code_for_aarch64_reload_mov instead of explicit mode checks. (rsqrte_type, get_rsqrte_type, rsqrts_type, get_rsqrts_type): Delete. (aarch64_emit_approx_sqrt): Use gen_aarch64_rsqrte instead of get_rsqrte_type and gen_aarch64_rsqrts instead of gen_rqrts_type. (recpe_type, get_recpe_type, recps_type, get_recps_type): Delete. (aarch64_emit_approx_div): Use gen_aarch64_frecpe instead of get_recpe_type and gen_aarch64_frecps instead of get_recps_type. (aarch64_atomic_load_op_code): Delete. (aarch64_emit_atomic_load_op): Likewise. (aarch64_gen_atomic_ldop): Use UNSPECV_ATOMIC_* instead of aarch64_atomic_load_op_code. Use gen_aarch64_atomic_load instead of aarch64_emit_atomic_load_op. * config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>) (aarch64_reload_movcp<VALL:mode><P:mode>, aarch64_reload_mov<mode>) (aarch64_movdi_<mode>low, aarch64_movdi_<mode>high) (aarch64_mov<mode>high_di, aarch64_mov<mode>low_di): Add a '@' character before the pattern name. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (aarch64_rsqrte<mode>, aarch64_rsqrts<mode>) (aarch64_simd_combine<mode>, aarch64_frecpe<mode>) (aarch64_frecps<mode>): Likewise. * config/aarch64/atomics.md (atomic_compare_and_swap<mode>) (aarch64_compare_and_swap<mode>, aarch64_compare_and_swap<mode>_lse) (aarch64_load_exclusive<mode>, aarch64_store_exclusive<mode>) (aarch64_atomic_swp<mode>, aarch64_atomic_cas<mode>) (aarch64_atomic_load<atomic_ldop><mode>): Likewise. From-SVN: r263251
2018-08-02 12:59:35 +02:00
(define_insn "@aarch64_frecpe<mode>"
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_HSDF
[(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
UNSPEC_FRECPE))]
"TARGET_SIMD"
"frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
[AArch64][2/10] ARMv8.2-A FP16 one operand vector intrinsics gcc/ * config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrte<mode>): Extend to HF modes. (neg<mode>2): Likewise. (abs<mode>2): Likewise. (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2): Likewise. (<optab><VDQF:mode><fcvt_target>2): Likewise. (<fix_trunc_optab><VDQF:mode><fcvt_target>2): Likewise. (ftrunc<VDQF:mode>2): Likewise. (<optab><fcvt_target><VDQF:mode>2): Likewise. (sqrt<mode>2): Likewise. (*sqrt<mode>2): Likewise. (aarch64_frecpe<mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for V4HF and V8HF. * config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New. (VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes. (stype): New. * config/aarch64/arm_neon.h (vdup_n_f16): New. (vdupq_n_f16): Likewise. (vld1_dup_f16): Use vdup_n_f16. (vld1q_dup_f16): Use vdupq_n_f16. (vabs_f16): New. (vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16, vsqrtq_f16): Likewise. From-SVN: r238716
2016-07-25 16:20:37 +02:00
[(set_attr "type" "neon_fp_recpe_<stype><q>")]
)
(define_insn "aarch64_frecpx<mode>"
[AArch64][7/10] ARMv8.2-A FP16 one operand scalar intrinsics gcc/ * config.gcc (aarch64*-*-*): Install arm_fp16.h. * config/aarch64/aarch64-builtins.c (hi_UP): New. * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_frsqrte<mode>): Extend to HF mode. (aarch64_frecp<FRECP:frecp_suffix><mode>): Likewise. (aarch64_cm<optab><mode>): Likewise. * config/aarch64/aarch64.md (<frint_pattern><mode>2): Likewise. (l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise. (fix_trunc<GPF:mode><GPI:mode>2): Likewise. (sqrt<mode>2): Likewise. (abs<mode>2): Likewise. (<optab><mode>hf2): New pattern for HF mode. (<optab>hihf2): Likewise. * config/aarch64/arm_neon.h: Include arm_fp16.h. * config/aarch64/iterators.md (GPF_F16, GPI_F16, VHSDF_HSDF): New. (w1, w2, v, s, q, Vmtype, V_cmp_result, fcvt_iesize, FCVT_IESIZE): Support HF mode. * config/aarch64/arm_fp16.h: New file. (vabsh_f16, vceqzh_f16, vcgezh_f16, vcgtzh_f16, vclezh_f16, vcltzh_f16, vcvth_f16_s16, vcvth_f16_s32, vcvth_f16_s64, vcvth_f16_u16, vcvth_f16_u32, vcvth_f16_u64, vcvth_s16_f16, vcvth_s32_f16, vcvth_s64_f16, vcvth_u16_f16, vcvth_u32_f16, vcvth_u64_f16, vcvtah_s16_f16, vcvtah_s32_f16, vcvtah_s64_f16, vcvtah_u16_f16, vcvtah_u32_f16, vcvtah_u64_f16, vcvtmh_s16_f16, vcvtmh_s32_f16, vcvtmh_s64_f16, vcvtmh_u16_f16, vcvtmh_u32_f16, vcvtmh_u64_f16, vcvtnh_s16_f16, vcvtnh_s32_f16, vcvtnh_s64_f16, vcvtnh_u16_f16, vcvtnh_u32_f16, vcvtnh_u64_f16, vcvtph_s16_f16, vcvtph_s32_f16, vcvtph_s64_f16, vcvtph_u16_f16, vcvtph_u32_f16, vcvtph_u64_f16, vnegh_f16, vrecpeh_f16, vrecpxh_f16, vrndh_f16, vrndah_f16, vrndih_f16, vrndmh_f16, vrndnh_f16, vrndph_f16, vrndxh_f16, vrsqrteh_f16, vsqrth_f16): New. From-SVN: r238722
2016-07-25 18:00:28 +02:00
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
UNSPEC_FRECPX))]
"TARGET_SIMD"
"frecpx\t%<s>0, %<s>1"
[(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
)
[gen/AArch64] Generate helpers for substituting iterator values into pattern names Given a pattern like: (define_insn "aarch64_frecpe<mode>" ...) the SVE ACLE implementation wants to generate the pattern for a particular (non-constant) mode. This patch automatically generates helpers to do that, specifically: // Return CODE_FOR_nothing on failure. insn_code maybe_code_for_aarch64_frecpe (machine_mode); // Assert that the code exists. insn_code code_for_aarch64_frecpe (machine_mode); // Return NULL_RTX on failure. rtx maybe_gen_aarch64_frecpe (machine_mode, rtx, rtx); // Assert that generation succeeds. rtx gen_aarch64_frecpe (machine_mode, rtx, rtx); Many patterns don't have sensible names when all <...>s are removed. E.g. "<optab><mode>2" would give a base name "2". The new functions therefore require explicit opt-in, which should also help to reduce code bloat. The (arbitrary) opt-in syntax I went for was to prefix the pattern name with '@', similarly to the existing '*' marker. The patch also makes config/aarch64 use the new routines in cases where they obviously apply. This was mostly straight-forward, but it seemed odd that we defined: aarch64_reload_movcp<...><P:mode> but then only used it with DImode, never SImode. If we should be using Pmode instead of DImode, then that's a simple change, but should probably be a separate patch. 2018-08-02 Richard Sandiford <richard.sandiford@arm.com> gcc/ * doc/md.texi: Expand the documentation of instruction names to mention port-local uses. Document '@' in pattern names. * read-md.h (overloaded_instance, overloaded_name): New structs. (mapping): Declare. (md_reader::handle_overloaded_name): New member function. (md_reader::get_overloads): Likewise. (md_reader::m_first_overload): New member variable. (md_reader::m_next_overload_ptr): Likewise. (md_reader::m_overloads_htab): Likewise. * read-md.c (md_reader::md_reader): Initialize m_first_overload, m_next_overload_ptr and m_overloads_htab. * read-rtl.c (iterator_group): Add "type" and "get_c_token" fields. (get_mode_token, get_code_token, get_int_token): New functions. (map_attr_string): Add an optional argument that passes back the associated iterator. (overloaded_name_hash, overloaded_name_eq_p, named_rtx_p): (md_reader::handle_overloaded_name, add_overload_instance): New functions. (apply_iterators): Handle '@' names. Report an error if '@' is used without iterators. (initialize_iterators): Initialize the new iterator_group fields. * genopinit.c (handle_overloaded_code_for) (handle_overloaded_gen): New functions. (main): Use them to print declarations of maybe_code_for_* and maybe_gen_* functions, and inline definitions of code_for_* and gen_*. * genemit.c (print_overload_arguments, print_overload_test) (handle_overloaded_code_for, handle_overloaded_gen): New functions. (main): Use it to print definitions of maybe_code_for_* and maybe_gen_* functions. * config/aarch64/aarch64.c (aarch64_split_128bit_move): Use gen_aarch64_mov{low,high}_di and gen_aarch64_movdi_{low,high} instead of explicit mode checks. (aarch64_split_simd_combine): Likewise gen_aarch64_simd_combine. (aarch64_split_simd_move): Likewise gen_aarch64_split_simd_mov. (aarch64_emit_load_exclusive): Likewise gen_aarch64_load_exclusive. (aarch64_emit_store_exclusive): Likewise gen_aarch64_store_exclusive. (aarch64_expand_compare_and_swap): Likewise gen_aarch64_compare_and_swap and gen_aarch64_compare_and_swap_lse (aarch64_gen_atomic_cas): Likewise gen_aarch64_atomic_cas. (aarch64_emit_atomic_swap): Likewise gen_aarch64_atomic_swp. (aarch64_constant_pool_reload_icode): Delete. (aarch64_secondary_reload): Use code_for_aarch64_reload_movcp instead of aarch64_constant_pool_reload_icode. Use code_for_aarch64_reload_mov instead of explicit mode checks. (rsqrte_type, get_rsqrte_type, rsqrts_type, get_rsqrts_type): Delete. (aarch64_emit_approx_sqrt): Use gen_aarch64_rsqrte instead of get_rsqrte_type and gen_aarch64_rsqrts instead of gen_rqrts_type. (recpe_type, get_recpe_type, recps_type, get_recps_type): Delete. (aarch64_emit_approx_div): Use gen_aarch64_frecpe instead of get_recpe_type and gen_aarch64_frecps instead of get_recps_type. (aarch64_atomic_load_op_code): Delete. (aarch64_emit_atomic_load_op): Likewise. (aarch64_gen_atomic_ldop): Use UNSPECV_ATOMIC_* instead of aarch64_atomic_load_op_code. Use gen_aarch64_atomic_load instead of aarch64_emit_atomic_load_op. * config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>) (aarch64_reload_movcp<VALL:mode><P:mode>, aarch64_reload_mov<mode>) (aarch64_movdi_<mode>low, aarch64_movdi_<mode>high) (aarch64_mov<mode>high_di, aarch64_mov<mode>low_di): Add a '@' character before the pattern name. * config/aarch64/aarch64-simd.md (aarch64_split_simd_mov<mode>) (aarch64_rsqrte<mode>, aarch64_rsqrts<mode>) (aarch64_simd_combine<mode>, aarch64_frecpe<mode>) (aarch64_frecps<mode>): Likewise. * config/aarch64/atomics.md (atomic_compare_and_swap<mode>) (aarch64_compare_and_swap<mode>, aarch64_compare_and_swap<mode>_lse) (aarch64_load_exclusive<mode>, aarch64_store_exclusive<mode>) (aarch64_atomic_swp<mode>, aarch64_atomic_cas<mode>) (aarch64_atomic_load<atomic_ldop><mode>): Likewise. From-SVN: r263251
2018-08-02 12:59:35 +02:00
(define_insn "@aarch64_frecps<mode>"
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
(unspec:VHSDF_HSDF
[(match_operand:VHSDF_HSDF 1 "register_operand" "w")
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
UNSPEC_FRECPS))]
"TARGET_SIMD"
"frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
2016-07-25 16:30:52 +02:00
[(set_attr "type" "neon_fp_recps_<stype><q>")]
)
(define_insn "aarch64_urecpe<mode>"
[(set (match_operand:VDQ_SI 0 "register_operand" "=w")
(unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
UNSPEC_URECPE))]
"TARGET_SIMD"
"urecpe\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel) PR target/80846 * optabs.def (vec_extract_optab, vec_init_optab): Change from a direct optab to conversion optab. * optabs.c (expand_vector_broadcast): Use convert_optab_handler with GET_MODE_INNER as last argument instead of optab_handler. * expmed.c (extract_bit_field_1): Likewise. Use vector from vector extraction if possible and optab is available. * expr.c (store_constructor): Use convert_optab_handler instead of optab_handler. Use vector initialization from smaller vectors if possible and optab is available. * tree-vect-stmts.c (vectorizable_load): Likewise. * doc/md.texi (vec_extract, vec_init): Document that the optabs now have two modes. * config/i386/i386.c (ix86_expand_vector_init): Handle expansion of vec_init from half-sized vectors with the same element mode. * config/i386/sse.md (ssehalfvecmode): Add V4TI case. (ssehalfvecmodelower, ssescalarmodelower): New mode attributes. (reduc_plus_scal_v8df, reduc_plus_scal_v4df, reduc_plus_scal_v2df, reduc_plus_scal_v16sf, reduc_plus_scal_v8sf, reduc_plus_scal_v4sf, reduc_<code>_scal_<mode>, reduc_umin_scal_v8hi): Add element mode after mode in gen_vec_extract* calls. (vec_extract<mode>): Renamed to ... (vec_extract<mode><ssescalarmodelower>): ... this. (vec_extract<mode><ssehalfvecmodelower>): New expander. (rotl<mode>3, rotr<mode>3, <shift_insn><mode>3, ashrv2di3): Add element mode after mode in gen_vec_init* calls. (VEC_INIT_HALF_MODE): New mode iterator. (vec_init<mode>): Renamed to ... (vec_init<mode><ssescalarmodelower>): ... this. (vec_init<mode><ssehalfvecmodelower>): New expander. * config/i386/mmx.md (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2si): Renamed to ... (vec_extractv2sisi): ... this. (vec_initv2si): Renamed to ... (vec_initv2sisi): ... this. (vec_extractv4hi): Renamed to ... (vec_extractv4hihi): ... this. (vec_initv4hi): Renamed to ... (vec_initv4hihi): ... this. (vec_extractv8qi): Renamed to ... (vec_extractv8qiqi): ... this. (vec_initv8qi): Renamed to ... (vec_initv8qiqi): ... this. * config/rs6000/vector.md (VEC_base_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><VEC_base_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><VEC_base_l>): ... this. * config/rs6000/paired.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. * config/rs6000/altivec.md (splitter, altivec_copysign_v4sf3, vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi, vec_unpacku_lo_v8hi, mulv16qi3, altivec_vreve<mode>2): Add element mode after mode in gen_vec_init* calls. * config/aarch64/aarch64-simd.md (vec_init<mode>): Renamed to ... (vec_init<mode><Vel>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><Vel>): ... this. * config/aarch64/iterators.md (Vel): New mode attribute. * config/s390/s390.c (s390_expand_vec_strlen, s390_expand_vec_movstr): Add element mode after mode in gen_vec_extract* calls. * config/s390/vector.md (non_vec_l): New mode attribute. (vec_extract<mode>): Renamed to ... (vec_extract<mode><non_vec_l>): ... this. (vec_init<mode>): Renamed to ... (vec_init<mode><non_vec_l>): ... this. * config/s390/s390-builtins.def (s390_vlgvb, s390_vlgvh, s390_vlgvf, s390_vlgvf_flt, s390_vlgvg, s390_vlgvg_dbl): Add element mode after vec_extract mode. * config/arm/iterators.md (V_elem_l): New mode attribute. * config/arm/neon.md (vec_extract<mode>): Renamed to ... (vec_extract<mode><V_elem_l>): ... this. (vec_extractv2di): Renamed to ... (vec_extractv2didi): ... this. (vec_init<mode>): Renamed to ... (vec_init<mode><V_elem_l>): ... this. (reduc_plus_scal_<mode>, reduc_plus_scal_v2di, reduc_smin_scal_<mode>, reduc_smax_scal_<mode>, reduc_umin_scal_<mode>, reduc_umax_scal_<mode>, neon_vget_lane<mode>, neon_vget_laneu<mode>): Add element mode after gen_vec_extract* calls. * config/mips/mips-msa.md (vec_init<mode>): Renamed to ... (vec_init<mode><unitmode>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><unitmode>): ... this. * config/mips/loongson.md (vec_init<mode>): Renamed to ... (vec_init<mode><unitmode>): ... this. * config/mips/mips-ps-3d.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. (reduc_plus_scal_v2sf, reduc_smin_scal_v2sf, reduc_smax_scal_v2sf): Add element mode after gen_vec_extract* calls. * config/mips/mips.md (unitmode): New mode iterator. * config/spu/spu.c (spu_expand_prologue, spu_allocate_stack, spu_builtin_extract): Add element mode after gen_vec_extract* calls. * config/spu/spu.md (inner_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><inner_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><inner_l>): ... this. * config/sparc/sparc.md (veltmode): New mode iterator. (vec_init<VMALL:mode>): Renamed to ... (vec_init<VMALL:mode><VMALL:veltmode>): ... this. * config/ia64/vect.md (vec_initv2si): Renamed to ... (vec_initv2sisi): ... this. (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. * config/powerpcspe/vector.md (VEC_base_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><VEC_base_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><VEC_base_l>): ... this. * config/powerpcspe/paired.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. * config/powerpcspe/altivec.md (splitter, altivec_copysign_v4sf3, vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi, vec_unpacku_lo_v8hi, mulv16qi3): Add element mode after mode in gen_vec_init* calls. From-SVN: r250759
2017-08-01 10:26:14 +02:00
;; Standard pattern name vec_extract<mode><Vel>.
re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel) PR target/80846 * optabs.def (vec_extract_optab, vec_init_optab): Change from a direct optab to conversion optab. * optabs.c (expand_vector_broadcast): Use convert_optab_handler with GET_MODE_INNER as last argument instead of optab_handler. * expmed.c (extract_bit_field_1): Likewise. Use vector from vector extraction if possible and optab is available. * expr.c (store_constructor): Use convert_optab_handler instead of optab_handler. Use vector initialization from smaller vectors if possible and optab is available. * tree-vect-stmts.c (vectorizable_load): Likewise. * doc/md.texi (vec_extract, vec_init): Document that the optabs now have two modes. * config/i386/i386.c (ix86_expand_vector_init): Handle expansion of vec_init from half-sized vectors with the same element mode. * config/i386/sse.md (ssehalfvecmode): Add V4TI case. (ssehalfvecmodelower, ssescalarmodelower): New mode attributes. (reduc_plus_scal_v8df, reduc_plus_scal_v4df, reduc_plus_scal_v2df, reduc_plus_scal_v16sf, reduc_plus_scal_v8sf, reduc_plus_scal_v4sf, reduc_<code>_scal_<mode>, reduc_umin_scal_v8hi): Add element mode after mode in gen_vec_extract* calls. (vec_extract<mode>): Renamed to ... (vec_extract<mode><ssescalarmodelower>): ... this. (vec_extract<mode><ssehalfvecmodelower>): New expander. (rotl<mode>3, rotr<mode>3, <shift_insn><mode>3, ashrv2di3): Add element mode after mode in gen_vec_init* calls. (VEC_INIT_HALF_MODE): New mode iterator. (vec_init<mode>): Renamed to ... (vec_init<mode><ssescalarmodelower>): ... this. (vec_init<mode><ssehalfvecmodelower>): New expander. * config/i386/mmx.md (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2si): Renamed to ... (vec_extractv2sisi): ... this. (vec_initv2si): Renamed to ... (vec_initv2sisi): ... this. (vec_extractv4hi): Renamed to ... (vec_extractv4hihi): ... this. (vec_initv4hi): Renamed to ... (vec_initv4hihi): ... this. (vec_extractv8qi): Renamed to ... (vec_extractv8qiqi): ... this. (vec_initv8qi): Renamed to ... (vec_initv8qiqi): ... this. * config/rs6000/vector.md (VEC_base_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><VEC_base_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><VEC_base_l>): ... this. * config/rs6000/paired.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. * config/rs6000/altivec.md (splitter, altivec_copysign_v4sf3, vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi, vec_unpacku_lo_v8hi, mulv16qi3, altivec_vreve<mode>2): Add element mode after mode in gen_vec_init* calls. * config/aarch64/aarch64-simd.md (vec_init<mode>): Renamed to ... (vec_init<mode><Vel>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><Vel>): ... this. * config/aarch64/iterators.md (Vel): New mode attribute. * config/s390/s390.c (s390_expand_vec_strlen, s390_expand_vec_movstr): Add element mode after mode in gen_vec_extract* calls. * config/s390/vector.md (non_vec_l): New mode attribute. (vec_extract<mode>): Renamed to ... (vec_extract<mode><non_vec_l>): ... this. (vec_init<mode>): Renamed to ... (vec_init<mode><non_vec_l>): ... this. * config/s390/s390-builtins.def (s390_vlgvb, s390_vlgvh, s390_vlgvf, s390_vlgvf_flt, s390_vlgvg, s390_vlgvg_dbl): Add element mode after vec_extract mode. * config/arm/iterators.md (V_elem_l): New mode attribute. * config/arm/neon.md (vec_extract<mode>): Renamed to ... (vec_extract<mode><V_elem_l>): ... this. (vec_extractv2di): Renamed to ... (vec_extractv2didi): ... this. (vec_init<mode>): Renamed to ... (vec_init<mode><V_elem_l>): ... this. (reduc_plus_scal_<mode>, reduc_plus_scal_v2di, reduc_smin_scal_<mode>, reduc_smax_scal_<mode>, reduc_umin_scal_<mode>, reduc_umax_scal_<mode>, neon_vget_lane<mode>, neon_vget_laneu<mode>): Add element mode after gen_vec_extract* calls. * config/mips/mips-msa.md (vec_init<mode>): Renamed to ... (vec_init<mode><unitmode>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><unitmode>): ... this. * config/mips/loongson.md (vec_init<mode>): Renamed to ... (vec_init<mode><unitmode>): ... this. * config/mips/mips-ps-3d.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. (reduc_plus_scal_v2sf, reduc_smin_scal_v2sf, reduc_smax_scal_v2sf): Add element mode after gen_vec_extract* calls. * config/mips/mips.md (unitmode): New mode iterator. * config/spu/spu.c (spu_expand_prologue, spu_allocate_stack, spu_builtin_extract): Add element mode after gen_vec_extract* calls. * config/spu/spu.md (inner_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><inner_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><inner_l>): ... this. * config/sparc/sparc.md (veltmode): New mode iterator. (vec_init<VMALL:mode>): Renamed to ... (vec_init<VMALL:mode><VMALL:veltmode>): ... this. * config/ia64/vect.md (vec_initv2si): Renamed to ... (vec_initv2sisi): ... this. (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. (vec_extractv2sf): Renamed to ... (vec_extractv2sfsf): ... this. * config/powerpcspe/vector.md (VEC_base_l): New mode attribute. (vec_init<mode>): Renamed to ... (vec_init<mode><VEC_base_l>): ... this. (vec_extract<mode>): Renamed to ... (vec_extract<mode><VEC_base_l>): ... this. * config/powerpcspe/paired.md (vec_initv2sf): Renamed to ... (vec_initv2sfsf): ... this. * config/powerpcspe/altivec.md (splitter, altivec_copysign_v4sf3, vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi, vec_unpacku_lo_v8hi, mulv16qi3): Add element mode after mode in gen_vec_init* calls. From-SVN: r250759
2017-08-01 10:26:14 +02:00
(define_expand "vec_extract<mode><Vel>"
[(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
(match_operand:VALL_F16 1 "register_operand")
(match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
{
emit_insn
(gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
DONE;
})
;; Extract a 64-bit vector from one half of a 128-bit vector.
(define_expand "vec_extract<mode><Vhalf>"
[(match_operand:<VHALF> 0 "register_operand")
(match_operand:VQMOV_NO2E 1 "register_operand")
(match_operand 2 "immediate_operand")]
"TARGET_SIMD"
{
int start = INTVAL (operands[2]);
if (start != 0 && start != <nunits> / 2)
FAIL;
rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
DONE;
})
;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
(define_expand "vec_extractv2dfv1df"
[(match_operand:V1DF 0 "register_operand")
(match_operand:V2DF 1 "register_operand")
(match_operand 2 "immediate_operand")]
"TARGET_SIMD"
{
/* V1DF is rarely used by other patterns, so it should be better to hide
it in a subreg destination of a normal DF op. */
rtx scalar0 = gen_lowpart (DFmode, operands[0]);
emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
DONE;
})
;; aes
(define_insn "aarch64_crypto_aes<aes_op>v16qi"
[(set (match_operand:V16QI 0 "register_operand" "=w")
[aarch64]: redefine aes patterns This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
2019-07-09 14:51:55 +02:00
(unspec:V16QI
[(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
CRYPTO_AES))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_AES"
"aes<aes_op>\\t%0.16b, %2.16b"
[(set_attr "type" "crypto_aese")]
)
(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
[aarch64]: redefine aes patterns This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
2019-07-09 14:51:55 +02:00
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
CRYPTO_AESMC))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_AES"
"aes<aesmc_op>\\t%0.16b, %1.16b"
[aarch64]: redefine aes patterns This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
2019-07-09 14:51:55 +02:00
[(set_attr "type" "crypto_aesmc")]
)
;; When AESE/AESMC fusion is enabled we really want to keep the two together
;; and enforce the register dependency without scheduling or register
;; allocation messing up the order or introducing moves inbetween.
;; Mash the two together during combine.
(define_insn "*aarch64_crypto_aese_fused"
[aarch64]: redefine aes patterns This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
2019-07-09 14:51:55 +02:00
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI
[aarch64]: redefine aes patterns This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
2019-07-09 14:51:55 +02:00
[(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESE)]
UNSPEC_AESMC))]
"TARGET_SIMD && TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
[(set_attr "type" "crypto_aese")
(set_attr "length" "8")]
)
;; When AESD/AESIMC fusion is enabled we really want to keep the two together
;; and enforce the register dependency without scheduling or register
;; allocation messing up the order or introducing moves inbetween.
;; Mash the two together during combine.
(define_insn "*aarch64_crypto_aesd_fused"
[aarch64]: redefine aes patterns This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
2019-07-09 14:51:55 +02:00
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI
[aarch64]: redefine aes patterns This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
2019-07-09 14:51:55 +02:00
[(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESD)]
UNSPEC_AESIMC))]
"TARGET_SIMD && TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
[(set_attr "type" "crypto_aese")
(set_attr "length" "8")]
)
;; sha1
(define_insn "aarch64_crypto_sha1hsi"
[(set (match_operand:SI 0 "register_operand" "=w")
(unspec:SI [(match_operand:SI 1
"register_operand" "w")]
UNSPEC_SHA1H))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
(define_insn "aarch64_crypto_sha1hv4si"
[(set (match_operand:SI 0 "register_operand" "=w")
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
(parallel [(const_int 0)]))]
UNSPEC_SHA1H))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
(define_insn "aarch64_be_crypto_sha1hv4si"
[(set (match_operand:SI 0 "register_operand" "=w")
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
(parallel [(const_int 3)]))]
UNSPEC_SHA1H))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
(define_insn "aarch64_crypto_sha1su1v4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SHA1SU1))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2"
"sha1su1\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sha1_fast")]
)
(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SHA1))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2"
"sha1<sha1_op>\\t%q0, %s2, %3.4s"
[(set_attr "type" "crypto_sha1_slow")]
)
(define_insn "aarch64_crypto_sha1su0v4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SHA1SU0))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2"
"sha1su0\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sha1_xor")]
)
;; sha256
(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SHA256))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2"
"sha256h<sha256_op>\\t%q0, %q2, %3.4s"
[(set_attr "type" "crypto_sha256_slow")]
)
(define_insn "aarch64_crypto_sha256su0v4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SHA256SU0))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2"
"sha256su0\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sha256_fast")]
)
(define_insn "aarch64_crypto_sha256su1v4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SHA256SU1))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA2"
"sha256su1\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sha256_slow")]
)
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
;; sha512
(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
[(set (match_operand:V2DI 0 "register_operand" "=w")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(match_operand:V2DI 2 "register_operand" "w")
(match_operand:V2DI 3 "register_operand" "w")]
CRYPTO_SHA512))]
"TARGET_SIMD && TARGET_SHA3"
"sha512h<sha512_op>\\t%q0, %q2, %3.2d"
[(set_attr "type" "crypto_sha512")]
)
(define_insn "aarch64_crypto_sha512su0qv2di"
[(set (match_operand:V2DI 0 "register_operand" "=w")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(match_operand:V2DI 2 "register_operand" "w")]
UNSPEC_SHA512SU0))]
"TARGET_SIMD && TARGET_SHA3"
"sha512su0\\t%0.2d, %2.2d"
[(set_attr "type" "crypto_sha512")]
)
(define_insn "aarch64_crypto_sha512su1qv2di"
[(set (match_operand:V2DI 0 "register_operand" "=w")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(match_operand:V2DI 2 "register_operand" "w")
(match_operand:V2DI 3 "register_operand" "w")]
UNSPEC_SHA512SU1))]
"TARGET_SIMD && TARGET_SHA3"
"sha512su1\\t%0.2d, %2.2d, %3.2d"
[(set_attr "type" "crypto_sha512")]
)
;; sha3
(define_insn "eor3q<mode>4"
[(set (match_operand:VQ_I 0 "register_operand" "=w")
(xor:VQ_I
(xor:VQ_I
(match_operand:VQ_I 2 "register_operand" "w")
(match_operand:VQ_I 3 "register_operand" "w"))
(match_operand:VQ_I 1 "register_operand" "w")))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA3"
"eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
[(set_attr "type" "crypto_sha3")]
)
(define_insn "aarch64_rax1qv2di"
[(set (match_operand:V2DI 0 "register_operand" "=w")
(xor:V2DI
(rotate:V2DI
(match_operand:V2DI 2 "register_operand" "w")
(const_int 1))
(match_operand:V2DI 1 "register_operand" "w")))]
"TARGET_SIMD && TARGET_SHA3"
"rax1\\t%0.2d, %1.2d, %2.2d"
[(set_attr "type" "crypto_sha3")]
)
(define_insn "aarch64_xarqv2di"
[(set (match_operand:V2DI 0 "register_operand" "=w")
(rotatert:V2DI
(xor:V2DI
(match_operand:V2DI 1 "register_operand" "%w")
(match_operand:V2DI 2 "register_operand" "w"))
(match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
"TARGET_SIMD && TARGET_SHA3"
"xar\\t%0.2d, %1.2d, %2.2d, %3"
[(set_attr "type" "crypto_sha3")]
)
(define_insn "bcaxq<mode>4"
[(set (match_operand:VQ_I 0 "register_operand" "=w")
(xor:VQ_I
(and:VQ_I
(not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
(match_operand:VQ_I 2 "register_operand" "w"))
(match_operand:VQ_I 1 "register_operand" "w")))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_SHA3"
"bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
[(set_attr "type" "crypto_sha3")]
)
;; SM3
(define_insn "aarch64_sm3ss1qv4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SM3SS1))]
"TARGET_SIMD && TARGET_SM4"
"sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sm3")]
)
(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")
(match_operand:SI 4 "aarch64_imm2" "Ui2")]
CRYPTO_SM3TT))]
"TARGET_SIMD && TARGET_SM4"
"sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
[(set_attr "type" "crypto_sm3")]
)
(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SM3PART))]
"TARGET_SIMD && TARGET_SM4"
"sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sm3")]
)
;; SM4
(define_insn "aarch64_sm4eqv4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SM4E))]
"TARGET_SIMD && TARGET_SM4"
"sm4e\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sm4")]
)
(define_insn "aarch64_sm4ekeyqv4si"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SM4EKEY))]
"TARGET_SIMD && TARGET_SM4"
"sm4ekey\\t%0.4s, %1.4s, %2.4s"
[(set_attr "type" "crypto_sm4")]
)
;; fp16fml
(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
[(set (match_operand:VDQSF 0 "register_operand")
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
(unspec:VDQSF
[(match_operand:VDQSF 1 "register_operand")
(match_operand:<VFMLA_W> 2 "register_operand")
(match_operand:<VFMLA_W> 3 "register_operand")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_LOW))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
<nunits> * 2, false);
rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
<nunits> * 2, false);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
operands[1],
operands[2],
operands[3],
p1, p2));
DONE;
})
(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
[(set (match_operand:VDQSF 0 "register_operand")
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
(unspec:VDQSF
[(match_operand:VDQSF 1 "register_operand")
(match_operand:<VFMLA_W> 2 "register_operand")
(match_operand:<VFMLA_W> 3 "register_operand")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_HIGH))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
operands[1],
operands[2],
operands[3],
p1, p2));
DONE;
})
(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(float_extend:VDQSF
(vec_select:<VFMLA_SEL_W>
(match_operand:<VFMLA_W> 2 "register_operand" "w")
(match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
(float_extend:VDQSF
(vec_select:<VFMLA_SEL_W>
(match_operand:<VFMLA_W> 3 "register_operand" "w")
(match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
(match_operand:VDQSF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(float_extend:VDQSF
(neg:<VFMLA_SEL_W>
(vec_select:<VFMLA_SEL_W>
(match_operand:<VFMLA_W> 2 "register_operand" "w")
(match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
(float_extend:VDQSF
(vec_select:<VFMLA_SEL_W>
(match_operand:<VFMLA_W> 3 "register_operand" "w")
(match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
(match_operand:VDQSF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(float_extend:VDQSF
(vec_select:<VFMLA_SEL_W>
(match_operand:<VFMLA_W> 2 "register_operand" "w")
(match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
(float_extend:VDQSF
(vec_select:<VFMLA_SEL_W>
(match_operand:<VFMLA_W> 3 "register_operand" "w")
(match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
(match_operand:VDQSF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(fma:VDQSF
(float_extend:VDQSF
(neg:<VFMLA_SEL_W>
(vec_select:<VFMLA_SEL_W>
(match_operand:<VFMLA_W> 2 "register_operand" "w")
(match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
(float_extend:VDQSF
(vec_select:<VFMLA_SEL_W>
(match_operand:<VFMLA_W> 3 "register_operand" "w")
(match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
(match_operand:VDQSF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
[(set (match_operand:V2SF 0 "register_operand")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
(match_operand:V4HF 2 "register_operand")
(match_operand:V4HF 3 "register_operand")
(match_operand:SI 4 "aarch64_imm2")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_LOW))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
operands[1],
operands[2],
operands[3],
p1, lane));
DONE;
}
)
(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
[(set (match_operand:V2SF 0 "register_operand")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
(match_operand:V4HF 2 "register_operand")
(match_operand:V4HF 3 "register_operand")
(match_operand:SI 4 "aarch64_imm2")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_HIGH))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
operands[1],
operands[2],
operands[3],
p1, lane));
DONE;
})
(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(fma:V2SF
(float_extend:V2SF
(vec_select:V2HF
(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
(float_extend:V2SF
(vec_duplicate:V2HF
(vec_select:HF
(match_operand:V4HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal\\t%0.2s, %2.2h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(fma:V2SF
(float_extend:V2SF
(neg:V2HF
(vec_select:V2HF
(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
(float_extend:V2SF
(vec_duplicate:V2HF
(vec_select:HF
(match_operand:V4HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlal_lane_highv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(fma:V2SF
(float_extend:V2SF
(vec_select:V2HF
(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
(float_extend:V2SF
(vec_duplicate:V2HF
(vec_select:HF
(match_operand:V4HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(fma:V2SF
(float_extend:V2SF
(neg:V2HF
(vec_select:V2HF
(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
(float_extend:V2SF
(vec_duplicate:V2HF
(vec_select:HF
(match_operand:V4HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
(match_operand:V8HF 2 "register_operand")
(match_operand:V8HF 3 "register_operand")
(match_operand:SI 4 "aarch64_lane_imm3")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_LOW))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
operands[1],
operands[2],
operands[3],
p1, lane));
DONE;
})
(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
[(set (match_operand:V4SF 0 "register_operand")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
(match_operand:V8HF 2 "register_operand")
(match_operand:V8HF 3 "register_operand")
(match_operand:SI 4 "aarch64_lane_imm3")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_HIGH))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
operands[1],
operands[2],
operands[3],
p1, lane));
DONE;
})
(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(fma:V4SF
(float_extend:V4SF
(vec_select:V4HF
(match_operand:V8HF 2 "register_operand" "w")
(match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
(float_extend:V4SF
(vec_duplicate:V4HF
(vec_select:HF
(match_operand:V8HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
(match_operand:V4SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal\\t%0.4s, %2.4h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(fma:V4SF
(float_extend:V4SF
(neg:V4HF
(vec_select:V4HF
(match_operand:V8HF 2 "register_operand" "w")
(match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
(float_extend:V4SF
(vec_duplicate:V4HF
(vec_select:HF
(match_operand:V8HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
(match_operand:V4SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(fma:V4SF
(float_extend:V4SF
(vec_select:V4HF
(match_operand:V8HF 2 "register_operand" "w")
(match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
(float_extend:V4SF
(vec_duplicate:V4HF
(vec_select:HF
(match_operand:V8HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
(match_operand:V4SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(fma:V4SF
(float_extend:V4SF
(neg:V4HF
(vec_select:V4HF
(match_operand:V8HF 2 "register_operand" "w")
(match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
(float_extend:V4SF
(vec_duplicate:V4HF
(vec_select:HF
(match_operand:V8HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
(match_operand:V4SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
[(set (match_operand:V2SF 0 "register_operand")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
(match_operand:V4HF 2 "register_operand")
(match_operand:V8HF 3 "register_operand")
(match_operand:SI 4 "aarch64_lane_imm3")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_LOW))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
operands[1],
operands[2],
operands[3],
p1, lane));
DONE;
})
(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
[(set (match_operand:V2SF 0 "register_operand")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
(match_operand:V4HF 2 "register_operand")
(match_operand:V8HF 3 "register_operand")
(match_operand:SI 4 "aarch64_lane_imm3")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_HIGH))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
operands[1],
operands[2],
operands[3],
p1, lane));
DONE;
})
(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(fma:V2SF
(float_extend:V2SF
(vec_select:V2HF
(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
(float_extend:V2SF
(vec_duplicate:V2HF
(vec_select:HF
(match_operand:V8HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal\\t%0.2s, %2.2h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(fma:V2SF
(float_extend:V2SF
(neg:V2HF
(vec_select:V2HF
(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
(float_extend:V2SF
(vec_duplicate:V2HF
(vec_select:HF
(match_operand:V8HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(fma:V2SF
(float_extend:V2SF
(vec_select:V2HF
(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
(float_extend:V2SF
(vec_duplicate:V2HF
(vec_select:HF
(match_operand:V8HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=w")
(fma:V2SF
(float_extend:V2SF
(neg:V2HF
(vec_select:V2HF
(match_operand:V4HF 2 "register_operand" "w")
(match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
(float_extend:V2SF
(vec_duplicate:V2HF
(vec_select:HF
(match_operand:V8HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
(match_operand:V8HF 2 "register_operand")
(match_operand:V4HF 3 "register_operand")
(match_operand:SI 4 "aarch64_imm2")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_LOW))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
operands[1],
operands[2],
operands[3],
p1, lane));
DONE;
})
(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
[(set (match_operand:V4SF 0 "register_operand")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
(match_operand:V8HF 2 "register_operand")
(match_operand:V4HF 3 "register_operand")
(match_operand:SI 4 "aarch64_imm2")]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
VFMLA16_HIGH))]
"TARGET_F16FML"
{
rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
operands[1],
operands[2],
operands[3],
p1, lane));
DONE;
})
(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(fma:V4SF
(float_extend:V4SF
(vec_select:V4HF
(match_operand:V8HF 2 "register_operand" "w")
(match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
(float_extend:V4SF
(vec_duplicate:V4HF
(vec_select:HF
(match_operand:V4HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
(match_operand:V4SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal\\t%0.4s, %2.4h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(fma:V4SF
(float_extend:V4SF
(neg:V4HF
(vec_select:V4HF
(match_operand:V8HF 2 "register_operand" "w")
(match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
(float_extend:V4SF
(vec_duplicate:V4HF
(vec_select:HF
(match_operand:V4HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
(match_operand:V4SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(fma:V4SF
(float_extend:V4SF
(vec_select:V4HF
(match_operand:V8HF 2 "register_operand" "w")
(match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
(float_extend:V4SF
(vec_duplicate:V4HF
(vec_select:HF
(match_operand:V4HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
(match_operand:V4SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(fma:V4SF
(float_extend:V4SF
(neg:V4HF
(vec_select:V4HF
(match_operand:V8HF 2 "register_operand" "w")
(match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
(float_extend:V4SF
(vec_duplicate:V4HF
(vec_select:HF
(match_operand:V4HF 3 "register_operand" "x")
(parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
(match_operand:V4SF 1 "register_operand" "0")))]
"TARGET_F16FML"
"fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
[(set_attr "type" "neon_fp_mul_s")]
)
;; pmull
(define_insn "aarch64_crypto_pmulldi"
[(set (match_operand:TI 0 "register_operand" "=w")
(unspec:TI [(match_operand:DI 1 "register_operand" "w")
(match_operand:DI 2 "register_operand" "w")]
UNSPEC_PMULL))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_AES"
"pmull\\t%0.1q, %1.1d, %2.1d"
[(set_attr "type" "crypto_pmull")]
)
(define_insn "aarch64_crypto_pmullv2di"
[(set (match_operand:TI 0 "register_operand" "=w")
(unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
(match_operand:V2DI 2 "register_operand" "w")]
UNSPEC_PMULL2))]
aarch64-modes.def (V2HF): New VECTOR_MODE. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
2018-01-11 07:04:17 +01:00
"TARGET_SIMD && TARGET_AES"
"pmull2\\t%0.1q, %1.2d, %2.2d"
[(set_attr "type" "crypto_pmull")]
)
Vectorise conversions between differently-sized integer vectors This patch adds AArch64 patterns for converting between 64-bit and 128-bit integer vectors, and makes the vectoriser and expand pass use them. 2019-11-14 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-cfg.c (verify_gimple_assign_unary): Handle conversions between vector types. * tree-vect-stmts.c (vectorizable_conversion): Extend the non-widening and non-narrowing path to handle standard conversion codes, if the target supports them. * expr.c (convert_move): Try using the extend and truncate optabs for vectors. * optabs-tree.c (supportable_convert_operation): Likewise. * config/aarch64/iterators.md (Vnarroqw): New iterator. * config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2) (trunc<mode><Vnarrowq>2): New patterns. gcc/testsuite/ * gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization to fail for aarch64 targets. * gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass on aarch64 targets. * gcc.dg/vect/vect-double-reduc-5.c: Likewise. * gcc.dg/vect/vect-outer-4e.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_5.c: New test. * gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise. From-SVN: r278245
2019-11-14 16:31:25 +01:00
;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
(define_insn "<optab><Vnarrowq><mode>2"
[(set (match_operand:VQN 0 "register_operand" "=w")
(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
"TARGET_SIMD"
"<su>xtl\t%0.<Vtype>, %1.<Vntype>"
[(set_attr "type" "neon_shift_imm_long")]
)
(define_expand "aarch64_<su>xtl<mode>"
[(set (match_operand:VQN 0 "register_operand" "=w")
(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
"TARGET_SIMD"
""
)
Vectorise conversions between differently-sized integer vectors This patch adds AArch64 patterns for converting between 64-bit and 128-bit integer vectors, and makes the vectoriser and expand pass use them. 2019-11-14 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-cfg.c (verify_gimple_assign_unary): Handle conversions between vector types. * tree-vect-stmts.c (vectorizable_conversion): Extend the non-widening and non-narrowing path to handle standard conversion codes, if the target supports them. * expr.c (convert_move): Try using the extend and truncate optabs for vectors. * optabs-tree.c (supportable_convert_operation): Likewise. * config/aarch64/iterators.md (Vnarroqw): New iterator. * config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2) (trunc<mode><Vnarrowq>2): New patterns. gcc/testsuite/ * gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization to fail for aarch64 targets. * gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass on aarch64 targets. * gcc.dg/vect/vect-double-reduc-5.c: Likewise. * gcc.dg/vect/vect-outer-4e.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_5.c: New test. * gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise. From-SVN: r278245
2019-11-14 16:31:25 +01:00
;; Truncate a 128-bit integer vector to a 64-bit vector.
(define_insn "trunc<mode><Vnarrowq>2"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
"TARGET_SIMD"
"xtn\t%0.<Vntype>, %1.<Vtype>"
[(set_attr "type" "neon_move_narrow_q")]
Vectorise conversions between differently-sized integer vectors This patch adds AArch64 patterns for converting between 64-bit and 128-bit integer vectors, and makes the vectoriser and expand pass use them. 2019-11-14 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-cfg.c (verify_gimple_assign_unary): Handle conversions between vector types. * tree-vect-stmts.c (vectorizable_conversion): Extend the non-widening and non-narrowing path to handle standard conversion codes, if the target supports them. * expr.c (convert_move): Try using the extend and truncate optabs for vectors. * optabs-tree.c (supportable_convert_operation): Likewise. * config/aarch64/iterators.md (Vnarroqw): New iterator. * config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2) (trunc<mode><Vnarrowq>2): New patterns. gcc/testsuite/ * gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization to fail for aarch64 targets. * gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass on aarch64 targets. * gcc.dg/vect/vect-double-reduc-5.c: Likewise. * gcc.dg/vect/vect-outer-4e.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_5.c: New test. * gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise. * gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise. From-SVN: r278245
2019-11-14 16:31:25 +01:00
)
(define_insn "aarch64_bfdot<mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(plus:VDQSF
(unspec:VDQSF
[(match_operand:<VBFMLA_W> 2 "register_operand" "w")
(match_operand:<VBFMLA_W> 3 "register_operand" "w")]
UNSPEC_BFDOT)
(match_operand:VDQSF 1 "register_operand" "0")))]
"TARGET_BF16_SIMD"
"bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
[(set_attr "type" "neon_dot<q>")]
)
(define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
[(set (match_operand:VDQSF 0 "register_operand" "=w")
(plus:VDQSF
(unspec:VDQSF
[(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
(match_operand:VBF 3 "register_operand" "w")
(match_operand:SI 4 "const_int_operand" "n")]
UNSPEC_BFDOT)
(match_operand:VDQSF 1 "register_operand" "0")))]
"TARGET_BF16_SIMD"
{
int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
int lane = INTVAL (operands[4]);
operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
}
[(set_attr "type" "neon_dot<VDQSF:q>")]
)
;; vget_low/high_bf16
(define_expand "aarch64_vget_lo_halfv8bf"
[(match_operand:V4BF 0 "register_operand")
(match_operand:V8BF 1 "register_operand")]
"TARGET_BF16_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
DONE;
})
(define_expand "aarch64_vget_hi_halfv8bf"
[(match_operand:V4BF 0 "register_operand")
(match_operand:V8BF 1 "register_operand")]
"TARGET_BF16_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
DONE;
})
;; bfmmla
(define_insn "aarch64_bfmmlaqv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
(unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
(match_operand:V8BF 3 "register_operand" "w")]
UNSPEC_BFMMLA)))]
"TARGET_BF16_SIMD"
"bfmmla\\t%0.4s, %2.8h, %3.8h"
[(set_attr "type" "neon_fp_mla_s_q")]
)
;; bfmlal<bt>
(define_insn "aarch64_bfmlal<bt>v4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
(unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
(match_operand:V8BF 3 "register_operand" "w")]
BF_MLA)))]
"TARGET_BF16_SIMD"
"bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
[(set_attr "type" "neon_fp_mla_s_q")]
)
(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
(unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
(match_operand:VBF 3 "register_operand" "w")
(match_operand:SI 4 "const_int_operand" "n")]
BF_MLA)))]
"TARGET_BF16_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
}
[(set_attr "type" "neon_fp_mla_s_scalar_q")]
)
;; 8-bit integer matrix multiply-accumulate
(define_insn "aarch64_simd_<sur>mmlav16qi"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(plus:V4SI
(unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
(match_operand:V16QI 3 "register_operand" "w")] MATMUL)
(match_operand:V4SI 1 "register_operand" "0")))]
"TARGET_I8MM"
"<sur>mmla\\t%0.4s, %2.16b, %3.16b"
[(set_attr "type" "neon_mla_s_q")]
)
;; bfcvtn
(define_insn "aarch64_bfcvtn<q><mode>"
[(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
(unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
UNSPEC_BFCVTN))]
"TARGET_BF16_SIMD"
"bfcvtn\\t%0.4h, %1.4s"
[(set_attr "type" "neon_fp_cvt_narrow_s_q")]
)
(define_insn "aarch64_bfcvtn2v8bf"
[(set (match_operand:V8BF 0 "register_operand" "=w")
(unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "w")]
UNSPEC_BFCVTN2))]
"TARGET_BF16_SIMD"
"bfcvtn2\\t%0.8h, %2.4s"
[(set_attr "type" "neon_fp_cvt_narrow_s_q")]
)
(define_insn "aarch64_bfcvtbf"
[(set (match_operand:BF 0 "register_operand" "=w")
(unspec:BF [(match_operand:SF 1 "register_operand" "w")]
UNSPEC_BFCVT))]
"TARGET_BF16_FP"
"bfcvt\\t%h0, %s1"
[(set_attr "type" "f_cvt")]
)
;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
(define_insn "aarch64_vbfcvt<mode>"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
UNSPEC_BFCVTN))]
"TARGET_BF16_SIMD"
"shll\\t%0.4s, %1.4h, #16"
[(set_attr "type" "neon_shift_imm_long")]
)
(define_insn "aarch64_vbfcvt_highv8bf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
UNSPEC_BFCVTN2))]
"TARGET_BF16_SIMD"
"shll2\\t%0.4s, %1.8h, #16"
[(set_attr "type" "neon_shift_imm_long")]
)
(define_insn "aarch64_bfcvtsf"
[(set (match_operand:SF 0 "register_operand" "=w")
(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
UNSPEC_BFCVT))]
"TARGET_BF16_FP"
"shl\\t%d0, %d1, #16"
[(set_attr "type" "neon_shift_imm")]
)