bfaa08b7ba
This patch adds support for SVE gather loads. It uses the basically the same analysis code as the AVX gather support, but after that there are two major differences: - It uses new internal functions rather than target built-ins. The interface is: IFN_GATHER_LOAD (base, offsets scale) IFN_MASK_GATHER_LOAD (base, offsets scale, mask) which should be reasonably generic. One of the advantages of using internal functions is that other passes can understand what the functions do, but a more immediate advantage is that we can query the underlying target pattern to see which scales it supports. - It uses pattern recognition to convert the offset to the right width, if it was originally narrower than that. This avoids having to do a widening operation as part of the gather expansion itself. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/md.texi (gather_load@var{m}): Document. (mask_gather_load@var{m}): Likewise. * genopinit.c (main): Add supports_vec_gather_load and supports_vec_gather_load_cached to target_optabs. * optabs-tree.c (init_tree_optimization_optabs): Use ggc_cleared_alloc to allocate target_optabs. * optabs.def (gather_load_optab, mask_gather_laod_optab): New optabs. * internal-fn.def (GATHER_LOAD, MASK_GATHER_LOAD): New internal functions. * internal-fn.h (internal_load_fn_p): Declare. (internal_gather_scatter_fn_p): Likewise. (internal_fn_mask_index): Likewise. (internal_gather_scatter_fn_supported_p): Likewise. * internal-fn.c (gather_load_direct): New macro. (expand_gather_load_optab_fn): New function. (direct_gather_load_optab_supported_p): New macro. (direct_internal_fn_optab): New function. (internal_load_fn_p): Likewise. (internal_gather_scatter_fn_p): Likewise. (internal_fn_mask_index): Likewise. (internal_gather_scatter_fn_supported_p): Likewise. * optabs-query.c (supports_at_least_one_mode_p): New function. (supports_vec_gather_load_p): Likewise. * optabs-query.h (supports_vec_gather_load_p): Declare. * tree-vectorizer.h (gather_scatter_info): Add ifn, element_type and memory_type field. (NUM_PATTERNS): Bump to 15. * tree-vect-data-refs.c: Include internal-fn.h. (vect_gather_scatter_fn_p): New function. (vect_describe_gather_scatter_call): Likewise. (vect_check_gather_scatter): Try using internal functions for gather loads. Recognize existing calls to a gather load function. (vect_analyze_data_refs): Consider using gather loads if supports_vec_gather_load_p. * tree-vect-patterns.c (vect_get_load_store_mask): New function. (vect_get_gather_scatter_offset_type): Likewise. (vect_convert_mask_for_vectype): Likewise. (vect_add_conversion_to_patterm): Likewise. (vect_try_gather_scatter_pattern): Likewise. (vect_recog_gather_scatter_pattern): New pattern recognizer. (vect_vect_recog_func_ptrs): Add it. * tree-vect-stmts.c (exist_non_indexing_operands_for_use_p): Use internal_fn_mask_index and internal_gather_scatter_fn_p. (check_load_store_masking): Take the gather_scatter_info as an argument and handle gather loads. (vect_get_gather_scatter_ops): New function. (vectorizable_call): Check internal_load_fn_p. (vectorizable_load): Likewise. Handle gather load internal functions. (vectorizable_store): Update call to check_load_store_masking. * config/aarch64/aarch64.md (UNSPEC_LD1_GATHER): New unspec. * config/aarch64/iterators.md (SVE_S, SVE_D): New mode iterators. * config/aarch64/predicates.md (aarch64_gather_scale_operand_w) (aarch64_gather_scale_operand_d): New predicates. * config/aarch64/aarch64-sve.md (gather_load<mode>): New expander. (mask_gather_load<mode>): New insns. gcc/testsuite/ * gcc.target/aarch64/sve/gather_load_1.c: New test. * gcc.target/aarch64/sve/gather_load_2.c: Likewise. * gcc.target/aarch64/sve/gather_load_3.c: Likewise. * gcc.target/aarch64/sve/gather_load_4.c: Likewise. * gcc.target/aarch64/sve/gather_load_5.c: Likewise. * gcc.target/aarch64/sve/gather_load_6.c: Likewise. * gcc.target/aarch64/sve/gather_load_7.c: Likewise. * gcc.target/aarch64/sve/mask_gather_load_1.c: Likewise. * gcc.target/aarch64/sve/mask_gather_load_2.c: Likewise. * gcc.target/aarch64/sve/mask_gather_load_3.c: Likewise. * gcc.target/aarch64/sve/mask_gather_load_4.c: Likewise. * gcc.target/aarch64/sve/mask_gather_load_5.c: Likewise. * gcc.target/aarch64/sve/mask_gather_load_6.c: Likewise. * gcc.target/aarch64/sve/mask_gather_load_7.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256640
390 lines
12 KiB
C
390 lines
12 KiB
C
/* Tree-based target query functions relating to optabs
|
|
Copyright (C) 1987-2018 Free Software Foundation, Inc.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "target.h"
|
|
#include "insn-codes.h"
|
|
#include "tree.h"
|
|
#include "optabs-tree.h"
|
|
#include "stor-layout.h"
|
|
|
|
/* Return the optab used for computing the operation given by the tree code,
|
|
CODE and the tree EXP. This function is not always usable (for example, it
|
|
cannot give complete results for multiplication or division) but probably
|
|
ought to be relied on more widely throughout the expander. */
|
|
optab
|
|
optab_for_tree_code (enum tree_code code, const_tree type,
|
|
enum optab_subtype subtype)
|
|
{
|
|
bool trapv;
|
|
switch (code)
|
|
{
|
|
case BIT_AND_EXPR:
|
|
return and_optab;
|
|
|
|
case BIT_IOR_EXPR:
|
|
return ior_optab;
|
|
|
|
case BIT_NOT_EXPR:
|
|
return one_cmpl_optab;
|
|
|
|
case BIT_XOR_EXPR:
|
|
return xor_optab;
|
|
|
|
case MULT_HIGHPART_EXPR:
|
|
return TYPE_UNSIGNED (type) ? umul_highpart_optab : smul_highpart_optab;
|
|
|
|
case TRUNC_MOD_EXPR:
|
|
case CEIL_MOD_EXPR:
|
|
case FLOOR_MOD_EXPR:
|
|
case ROUND_MOD_EXPR:
|
|
return TYPE_UNSIGNED (type) ? umod_optab : smod_optab;
|
|
|
|
case RDIV_EXPR:
|
|
case TRUNC_DIV_EXPR:
|
|
case CEIL_DIV_EXPR:
|
|
case FLOOR_DIV_EXPR:
|
|
case ROUND_DIV_EXPR:
|
|
case EXACT_DIV_EXPR:
|
|
if (TYPE_SATURATING (type))
|
|
return TYPE_UNSIGNED (type) ? usdiv_optab : ssdiv_optab;
|
|
return TYPE_UNSIGNED (type) ? udiv_optab : sdiv_optab;
|
|
|
|
case LSHIFT_EXPR:
|
|
if (TREE_CODE (type) == VECTOR_TYPE)
|
|
{
|
|
if (subtype == optab_vector)
|
|
return TYPE_SATURATING (type) ? unknown_optab : vashl_optab;
|
|
|
|
gcc_assert (subtype == optab_scalar);
|
|
}
|
|
if (TYPE_SATURATING (type))
|
|
return TYPE_UNSIGNED (type) ? usashl_optab : ssashl_optab;
|
|
return ashl_optab;
|
|
|
|
case RSHIFT_EXPR:
|
|
if (TREE_CODE (type) == VECTOR_TYPE)
|
|
{
|
|
if (subtype == optab_vector)
|
|
return TYPE_UNSIGNED (type) ? vlshr_optab : vashr_optab;
|
|
|
|
gcc_assert (subtype == optab_scalar);
|
|
}
|
|
return TYPE_UNSIGNED (type) ? lshr_optab : ashr_optab;
|
|
|
|
case LROTATE_EXPR:
|
|
if (TREE_CODE (type) == VECTOR_TYPE)
|
|
{
|
|
if (subtype == optab_vector)
|
|
return vrotl_optab;
|
|
|
|
gcc_assert (subtype == optab_scalar);
|
|
}
|
|
return rotl_optab;
|
|
|
|
case RROTATE_EXPR:
|
|
if (TREE_CODE (type) == VECTOR_TYPE)
|
|
{
|
|
if (subtype == optab_vector)
|
|
return vrotr_optab;
|
|
|
|
gcc_assert (subtype == optab_scalar);
|
|
}
|
|
return rotr_optab;
|
|
|
|
case MAX_EXPR:
|
|
return TYPE_UNSIGNED (type) ? umax_optab : smax_optab;
|
|
|
|
case MIN_EXPR:
|
|
return TYPE_UNSIGNED (type) ? umin_optab : smin_optab;
|
|
|
|
case REALIGN_LOAD_EXPR:
|
|
return vec_realign_load_optab;
|
|
|
|
case WIDEN_SUM_EXPR:
|
|
return TYPE_UNSIGNED (type) ? usum_widen_optab : ssum_widen_optab;
|
|
|
|
case DOT_PROD_EXPR:
|
|
return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab;
|
|
|
|
case SAD_EXPR:
|
|
return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab;
|
|
|
|
case WIDEN_MULT_PLUS_EXPR:
|
|
return (TYPE_UNSIGNED (type)
|
|
? (TYPE_SATURATING (type)
|
|
? usmadd_widen_optab : umadd_widen_optab)
|
|
: (TYPE_SATURATING (type)
|
|
? ssmadd_widen_optab : smadd_widen_optab));
|
|
|
|
case WIDEN_MULT_MINUS_EXPR:
|
|
return (TYPE_UNSIGNED (type)
|
|
? (TYPE_SATURATING (type)
|
|
? usmsub_widen_optab : umsub_widen_optab)
|
|
: (TYPE_SATURATING (type)
|
|
? ssmsub_widen_optab : smsub_widen_optab));
|
|
|
|
case FMA_EXPR:
|
|
return fma_optab;
|
|
|
|
case VEC_WIDEN_MULT_HI_EXPR:
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
|
|
|
|
case VEC_WIDEN_MULT_LO_EXPR:
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
|
|
|
|
case VEC_WIDEN_MULT_EVEN_EXPR:
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_widen_umult_even_optab : vec_widen_smult_even_optab;
|
|
|
|
case VEC_WIDEN_MULT_ODD_EXPR:
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
|
|
|
|
case VEC_WIDEN_LSHIFT_HI_EXPR:
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
|
|
|
|
case VEC_WIDEN_LSHIFT_LO_EXPR:
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
|
|
|
|
case VEC_UNPACK_HI_EXPR:
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_unpacku_hi_optab : vec_unpacks_hi_optab;
|
|
|
|
case VEC_UNPACK_LO_EXPR:
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_unpacku_lo_optab : vec_unpacks_lo_optab;
|
|
|
|
case VEC_UNPACK_FLOAT_HI_EXPR:
|
|
/* The signedness is determined from input operand. */
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_unpacku_float_hi_optab : vec_unpacks_float_hi_optab;
|
|
|
|
case VEC_UNPACK_FLOAT_LO_EXPR:
|
|
/* The signedness is determined from input operand. */
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_unpacku_float_lo_optab : vec_unpacks_float_lo_optab;
|
|
|
|
case VEC_PACK_TRUNC_EXPR:
|
|
return vec_pack_trunc_optab;
|
|
|
|
case VEC_PACK_SAT_EXPR:
|
|
return TYPE_UNSIGNED (type) ? vec_pack_usat_optab : vec_pack_ssat_optab;
|
|
|
|
case VEC_PACK_FIX_TRUNC_EXPR:
|
|
/* The signedness is determined from output operand. */
|
|
return TYPE_UNSIGNED (type) ?
|
|
vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab;
|
|
|
|
case VEC_DUPLICATE_EXPR:
|
|
return vec_duplicate_optab;
|
|
|
|
case VEC_SERIES_EXPR:
|
|
return vec_series_optab;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
trapv = INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type);
|
|
switch (code)
|
|
{
|
|
case POINTER_PLUS_EXPR:
|
|
case PLUS_EXPR:
|
|
if (TYPE_SATURATING (type))
|
|
return TYPE_UNSIGNED (type) ? usadd_optab : ssadd_optab;
|
|
return trapv ? addv_optab : add_optab;
|
|
|
|
case POINTER_DIFF_EXPR:
|
|
case MINUS_EXPR:
|
|
if (TYPE_SATURATING (type))
|
|
return TYPE_UNSIGNED (type) ? ussub_optab : sssub_optab;
|
|
return trapv ? subv_optab : sub_optab;
|
|
|
|
case MULT_EXPR:
|
|
if (TYPE_SATURATING (type))
|
|
return TYPE_UNSIGNED (type) ? usmul_optab : ssmul_optab;
|
|
return trapv ? smulv_optab : smul_optab;
|
|
|
|
case NEGATE_EXPR:
|
|
if (TYPE_SATURATING (type))
|
|
return TYPE_UNSIGNED (type) ? usneg_optab : ssneg_optab;
|
|
return trapv ? negv_optab : neg_optab;
|
|
|
|
case ABS_EXPR:
|
|
return trapv ? absv_optab : abs_optab;
|
|
|
|
default:
|
|
return unknown_optab;
|
|
}
|
|
}
|
|
|
|
/* Function supportable_convert_operation
|
|
|
|
Check whether an operation represented by the code CODE is a
|
|
convert operation that is supported by the target platform in
|
|
vector form (i.e., when operating on arguments of type VECTYPE_IN
|
|
producing a result of type VECTYPE_OUT).
|
|
|
|
Convert operations we currently support directly are FIX_TRUNC and FLOAT.
|
|
This function checks if these operations are supported
|
|
by the target platform either directly (via vector tree-codes), or via
|
|
target builtins.
|
|
|
|
Output:
|
|
- CODE1 is code of vector operation to be used when
|
|
vectorizing the operation, if available.
|
|
- DECL is decl of target builtin functions to be used
|
|
when vectorizing the operation, if available. In this case,
|
|
CODE1 is CALL_EXPR. */
|
|
|
|
bool
|
|
supportable_convert_operation (enum tree_code code,
|
|
tree vectype_out, tree vectype_in,
|
|
tree *decl, enum tree_code *code1)
|
|
{
|
|
machine_mode m1,m2;
|
|
bool truncp;
|
|
|
|
m1 = TYPE_MODE (vectype_out);
|
|
m2 = TYPE_MODE (vectype_in);
|
|
|
|
/* First check if we can done conversion directly. */
|
|
if ((code == FIX_TRUNC_EXPR
|
|
&& can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp)
|
|
!= CODE_FOR_nothing)
|
|
|| (code == FLOAT_EXPR
|
|
&& can_float_p (m1,m2,TYPE_UNSIGNED (vectype_in))
|
|
!= CODE_FOR_nothing))
|
|
{
|
|
*code1 = code;
|
|
return true;
|
|
}
|
|
|
|
/* Now check for builtin. */
|
|
if (targetm.vectorize.builtin_conversion
|
|
&& targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
|
|
{
|
|
*code1 = CALL_EXPR;
|
|
*decl = targetm.vectorize.builtin_conversion (code, vectype_out,
|
|
vectype_in);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* Return TRUE if appropriate vector insn is available
|
|
for vector comparison expr with vector type VALUE_TYPE
|
|
and resulting mask with MASK_TYPE. */
|
|
|
|
bool
|
|
expand_vec_cmp_expr_p (tree value_type, tree mask_type, enum tree_code code)
|
|
{
|
|
if (get_vec_cmp_icode (TYPE_MODE (value_type), TYPE_MODE (mask_type),
|
|
TYPE_UNSIGNED (value_type)) != CODE_FOR_nothing)
|
|
return true;
|
|
if ((code == EQ_EXPR || code == NE_EXPR)
|
|
&& (get_vec_cmp_eq_icode (TYPE_MODE (value_type), TYPE_MODE (mask_type))
|
|
!= CODE_FOR_nothing))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/* Return TRUE iff, appropriate vector insns are available
|
|
for vector cond expr with vector type VALUE_TYPE and a comparison
|
|
with operand vector types in CMP_OP_TYPE. */
|
|
|
|
bool
|
|
expand_vec_cond_expr_p (tree value_type, tree cmp_op_type, enum tree_code code)
|
|
{
|
|
machine_mode value_mode = TYPE_MODE (value_type);
|
|
machine_mode cmp_op_mode = TYPE_MODE (cmp_op_type);
|
|
if (VECTOR_BOOLEAN_TYPE_P (cmp_op_type)
|
|
&& get_vcond_mask_icode (TYPE_MODE (value_type),
|
|
TYPE_MODE (cmp_op_type)) != CODE_FOR_nothing)
|
|
return true;
|
|
|
|
if (maybe_ne (GET_MODE_SIZE (value_mode), GET_MODE_SIZE (cmp_op_mode))
|
|
|| maybe_ne (GET_MODE_NUNITS (value_mode), GET_MODE_NUNITS (cmp_op_mode)))
|
|
return false;
|
|
|
|
if (get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type),
|
|
TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing
|
|
&& ((code != EQ_EXPR && code != NE_EXPR)
|
|
|| get_vcond_eq_icode (TYPE_MODE (value_type),
|
|
TYPE_MODE (cmp_op_type)) == CODE_FOR_nothing))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Use the current target and options to initialize
|
|
TREE_OPTIMIZATION_OPTABS (OPTNODE). */
|
|
|
|
void
|
|
init_tree_optimization_optabs (tree optnode)
|
|
{
|
|
/* Quick exit if we have already computed optabs for this target. */
|
|
if (TREE_OPTIMIZATION_BASE_OPTABS (optnode) == this_target_optabs)
|
|
return;
|
|
|
|
/* Forget any previous information and set up for the current target. */
|
|
TREE_OPTIMIZATION_BASE_OPTABS (optnode) = this_target_optabs;
|
|
struct target_optabs *tmp_optabs = (struct target_optabs *)
|
|
TREE_OPTIMIZATION_OPTABS (optnode);
|
|
if (tmp_optabs)
|
|
memset (tmp_optabs, 0, sizeof (struct target_optabs));
|
|
else
|
|
tmp_optabs = ggc_cleared_alloc<target_optabs> ();
|
|
|
|
/* Generate a new set of optabs into tmp_optabs. */
|
|
init_all_optabs (tmp_optabs);
|
|
|
|
/* If the optabs changed, record it. */
|
|
if (memcmp (tmp_optabs, this_target_optabs, sizeof (struct target_optabs)))
|
|
TREE_OPTIMIZATION_OPTABS (optnode) = tmp_optabs;
|
|
else
|
|
{
|
|
TREE_OPTIMIZATION_OPTABS (optnode) = NULL;
|
|
ggc_free (tmp_optabs);
|
|
}
|
|
}
|
|
|
|
/* Return TRUE if the target has support for vector right shift of an
|
|
operand of type TYPE. If OT_TYPE is OPTAB_DEFAULT, check for existence
|
|
of a shift by either a scalar or a vector. Otherwise, check only
|
|
for a shift that matches OT_TYPE. */
|
|
|
|
bool
|
|
target_supports_op_p (tree type, enum tree_code code,
|
|
enum optab_subtype ot_subtype)
|
|
{
|
|
optab ot = optab_for_tree_code (code, type, ot_subtype);
|
|
return (ot != unknown_optab
|
|
&& optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
|
|
}
|
|
|